1use std;
17
18use abortable_parser::combinators::*;
19use abortable_parser::iter::SliceIter;
20use abortable_parser::{Error, Result};
21
22use crate::ast::*;
23use crate::error::BuildError;
24use crate::iter::OffsetStrIter;
25
26pub type CommentGroup = Vec<Token>;
27pub type CommentMap = std::collections::BTreeMap<usize, CommentGroup>;
28
29fn is_symbol_char<'a>(i: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, u8> {
30 let mut _i = i.clone();
31 let c = match _i.next() {
32 Some(c) => *c,
33 None => {
34 return Result::Fail(Error::new(
35 "Unexpected End of Input".to_string(),
36 Box::new(_i.clone()),
37 ));
38 }
39 };
40 if (c as char).is_ascii_alphanumeric() || c == b'-' || c == b'_' {
41 Result::Complete(_i, c)
42 } else {
43 Result::Fail(Error::new(
44 "Not a symbol character".to_string(),
45 Box::new(_i.clone()),
46 ))
47 }
48}
49
50fn escapequoted<'a>(input: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, String> {
51 let mut frag = String::new();
54 let mut escape = false;
55 let mut _input = input.clone();
56 loop {
57 let c = match _input.next() {
58 Some(c) => *c,
59 None => break,
60 };
61 if escape {
62 match c as char {
63 'n' => {
64 frag.push('\n');
65 escape = false;
66 continue;
67 }
68 'r' => {
69 frag.push('\r');
70 escape = false;
71 continue;
72 }
73 't' => {
74 frag.push('\t');
75 escape = false;
76 continue;
77 }
78 _ => {
79 }
81 }
82 }
83 if c == '\\' as u8 && !escape {
84 escape = true;
86 } else if c == '"' as u8 && !escape {
87 return Result::Complete(_input, frag);
90 } else {
91 frag.push(c as char);
93 escape = false; }
95 }
96 return Result::Incomplete(_input.clone());
97}
98
99make_fn!(strtok<OffsetStrIter, Token>,
100 do_each!(
101 span => input!(),
102 _ => text_token!("\""),
103 frag => escapequoted,
104 (Token{
105 typ: TokenType::QUOTED,
106 pos: Position::from(&span),
107 fragment: frag.to_string(),
108 })
109 )
110);
111
112make_fn!(barewordtok<OffsetStrIter, Token>,
113 do_each!(
114 span => input!(),
115 _ => peek!(ascii_alpha),
116 frag => consume_all!(is_symbol_char),
117 (Token{
118 typ: TokenType::BAREWORD,
119 pos: Position::from(&span),
120 fragment: frag.to_string(),
121 })
122 )
123);
124
125make_fn!(digittok<OffsetStrIter, Token>,
126 do_each!(
127 span => input!(),
128 _ => peek!(ascii_digit),
129 digits => consume_all!(ascii_digit),
130 (Token{
131 typ: TokenType::DIGIT,
132 pos: Position::from(&span),
133 fragment: digits.to_string(),
134 })
135 )
136);
137
138make_fn!(booleantok<OffsetStrIter, Token>,
139 do_each!(
140 span => input!(),
141 token => either!(
142 text_token!("true"),
143 text_token!("false")
144 ),
145 (Token{
146 typ: TokenType::BOOLEAN,
147 pos: Position::from(&span),
148 fragment: token.to_string(),
149 })
150 )
151);
152
153macro_rules! do_text_token_tok {
156 ($i:expr, $type:expr, $text_token:expr, WS) => {
157 do_each!($i,
158 span => input!(),
159 frag => text_token!($text_token),
160 _ => either!(whitespace, comment),
161 (Token {
162 typ: $type,
163 pos: Position::from(&span),
164 fragment: frag.to_string(),
165 })
166 )
167 };
168
169 ($i:expr, $type:expr, $text_token:expr) => {
170 do_each!($i,
171 span => input!(),
172 frag => text_token!($text_token),
173 (Token {
174 typ: $type,
175 pos: Position::from(&span),
176 fragment: frag.to_string(),
177 })
178 )
179 };
180}
181
182make_fn!(emptytok<OffsetStrIter, Token>,
183 do_text_token_tok!(TokenType::EMPTY, "NULL")
184);
185
186make_fn!(commatok<OffsetStrIter, Token>,
187 do_text_token_tok!(TokenType::PUNCT, ",")
188);
189
190make_fn!(lbracetok<OffsetStrIter, Token>,
191 do_text_token_tok!(TokenType::PUNCT, "{")
192);
193
194make_fn!(rbracetok<OffsetStrIter, Token>,
195 do_text_token_tok!(TokenType::PUNCT, "}")
196);
197
198make_fn!(lparentok<OffsetStrIter, Token>,
199 do_text_token_tok!(TokenType::PUNCT, "(")
200);
201
202make_fn!(rparentok<OffsetStrIter, Token>,
203 do_text_token_tok!(TokenType::PUNCT, ")")
204);
205
206make_fn!(dottok<OffsetStrIter, Token>,
207 do_text_token_tok!(TokenType::PUNCT, ".")
208);
209
210make_fn!(plustok<OffsetStrIter, Token>,
211 do_text_token_tok!(TokenType::PUNCT, "+")
212);
213
214make_fn!(dashtok<OffsetStrIter, Token>,
215 do_text_token_tok!(TokenType::PUNCT, "-")
216);
217
218make_fn!(startok<OffsetStrIter, Token>,
219 do_text_token_tok!(TokenType::PUNCT, "*")
220);
221
222make_fn!(slashtok<OffsetStrIter, Token>,
223 do_text_token_tok!(TokenType::PUNCT, "/")
224);
225
226make_fn!(modulustok<OffsetStrIter, Token>,
227 do_text_token_tok!(TokenType::PUNCT, "%%")
228);
229
230make_fn!(pcttok<OffsetStrIter, Token>,
231 do_text_token_tok!(TokenType::PUNCT, "%")
232);
233
234make_fn!(eqeqtok<OffsetStrIter, Token>,
235 do_text_token_tok!(TokenType::PUNCT, "==")
236);
237
238make_fn!(notequaltok<OffsetStrIter, Token>,
239 do_text_token_tok!(TokenType::PUNCT, "!=")
240);
241
242make_fn!(matchtok<OffsetStrIter, Token>,
243 do_text_token_tok!(TokenType::PUNCT, "~")
244);
245
246make_fn!(notmatchtok<OffsetStrIter, Token>,
247 do_text_token_tok!(TokenType::PUNCT, "!~")
248);
249
250make_fn!(gttok<OffsetStrIter, Token>,
251 do_text_token_tok!(TokenType::PUNCT, ">")
252);
253
254make_fn!(gtequaltok<OffsetStrIter, Token>,
255 do_text_token_tok!(TokenType::PUNCT, ">=")
256);
257
258make_fn!(ltequaltok<OffsetStrIter, Token>,
259 do_text_token_tok!(TokenType::PUNCT, "<=")
260);
261
262make_fn!(lttok<OffsetStrIter, Token>,
263 do_text_token_tok!(TokenType::PUNCT, "<")
264);
265
266make_fn!(equaltok<OffsetStrIter, Token>,
267 do_text_token_tok!(TokenType::PUNCT, "=")
268);
269
270make_fn!(semicolontok<OffsetStrIter, Token>,
271 do_text_token_tok!(TokenType::PUNCT, ";")
272);
273
274make_fn!(doublecolontok<OffsetStrIter, Token>,
275 do_text_token_tok!(TokenType::PUNCT, "::")
276);
277
278make_fn!(colontok<OffsetStrIter, Token>,
279 do_text_token_tok!(TokenType::PUNCT, ":")
280);
281
282make_fn!(leftsquarebracket<OffsetStrIter, Token>,
283 do_text_token_tok!(TokenType::PUNCT, "[")
284);
285
286make_fn!(rightsquarebracket<OffsetStrIter, Token>,
287 do_text_token_tok!(TokenType::PUNCT, "]")
288);
289
290make_fn!(fatcommatok<OffsetStrIter, Token>,
291 do_text_token_tok!(TokenType::PUNCT, "=>")
292);
293
294make_fn!(andtok<OffsetStrIter, Token>,
295 do_text_token_tok!(TokenType::PUNCT, "&&")
296);
297
298make_fn!(ortok<OffsetStrIter, Token>,
299 do_text_token_tok!(TokenType::PUNCT, "||")
300);
301
302make_fn!(selecttok<OffsetStrIter, Token>,
303 do_text_token_tok!(TokenType::BAREWORD, "select", WS)
304);
305
306make_fn!(intok<OffsetStrIter, Token>,
307 do_text_token_tok!(TokenType::BAREWORD, "in", WS)
308);
309
310make_fn!(istok<OffsetStrIter, Token>,
311 do_text_token_tok!(TokenType::BAREWORD, "is", WS)
312);
313
314make_fn!(nottok<OffsetStrIter, Token>,
315 do_text_token_tok!(TokenType::BAREWORD, "not", WS)
316);
317
318make_fn!(tracetok<OffsetStrIter, Token>,
319 do_text_token_tok!(TokenType::BAREWORD, "TRACE", WS)
320);
321
322make_fn!(failtok<OffsetStrIter, Token>,
323 do_text_token_tok!(TokenType::BAREWORD, "fail", WS)
324);
325
326make_fn!(functok<OffsetStrIter, Token>,
327 do_text_token_tok!(TokenType::BAREWORD, "func", WS)
328);
329
330make_fn!(moduletok<OffsetStrIter, Token>,
331 do_text_token_tok!(TokenType::BAREWORD, "module", WS)
332);
333
334make_fn!(lettok<OffsetStrIter, Token>,
335 do_text_token_tok!(TokenType::BAREWORD, "let", WS)
336);
337
338make_fn!(importtok<OffsetStrIter, Token>,
339 do_text_token_tok!(TokenType::BAREWORD, "import", WS)
340);
341
342make_fn!(includetok<OffsetStrIter, Token>,
343 do_text_token_tok!(TokenType::BAREWORD, "include", WS)
344);
345
346make_fn!(asserttok<OffsetStrIter, Token>,
347 do_text_token_tok!(TokenType::BAREWORD, "assert", WS)
348);
349
350make_fn!(outtok<OffsetStrIter, Token>,
351 do_text_token_tok!(TokenType::BAREWORD, "out", WS)
352);
353
354make_fn!(converttok<OffsetStrIter, Token>,
355 do_text_token_tok!(TokenType::BAREWORD, "convert", WS)
356);
357
358make_fn!(astok<OffsetStrIter, Token>,
359 do_text_token_tok!(TokenType::BAREWORD, "as", WS)
360);
361
362make_fn!(maptok<OffsetStrIter, Token>,
363 do_text_token_tok!(TokenType::BAREWORD, "map", WS)
364);
365
366make_fn!(filtertok<OffsetStrIter, Token>,
367 do_text_token_tok!(TokenType::BAREWORD, "filter", WS)
368);
369
370make_fn!(reducetok<OffsetStrIter, Token>,
371 do_text_token_tok!(TokenType::BAREWORD, "reduce", WS)
372);
373
374fn comment(input: OffsetStrIter) -> Result<OffsetStrIter, Token> {
375 match text_token!(input, "//") {
376 Result::Complete(rest, _) => {
377 match until!(
378 rest,
379 either!(
380 eoi,
381 discard!(text_token!("\r\n")),
382 discard!(text_token!("\n"))
383 )
384 ) {
385 Result::Complete(rest, cmt) => {
386 let rest =
388 match optional!(rest, either!(text_token!("\r\n"), text_token!("\n"))) {
389 Result::Complete(next_rest, _) => next_rest,
390 _ => rest,
391 };
392 return Result::Complete(rest, make_tok!(CMT => cmt.to_string(), input));
393 }
394 _ => {
396 return Result::Abort(Error::new(
397 "Unparsable comment".to_string(),
398 Box::new(rest.clone()),
399 ));
400 }
401 }
402 }
403 Result::Incomplete(ctx) => return Result::Incomplete(ctx),
404 Result::Fail(e) => return Result::Fail(e),
405 Result::Abort(e) => return Result::Abort(e),
406 }
407}
408
409make_fn!(whitespace<OffsetStrIter, Token>,
410 do_each!(
411 span => input!(),
412 _ => peek!(ascii_ws),
413 _ => repeat!(ascii_ws),
414 (Token{
415 typ: TokenType::WS,
416 pos: Position::from(&span),
417 fragment: String::new(),
418 })
419 )
420);
421
422make_fn!(end_of_input<OffsetStrIter, Token>,
423 do_each!(
424 span => input!(),
425 _ => eoi,
426 (Token{
427 typ: TokenType::END,
428 pos: Position::from(&span),
429 fragment: String::new(),
430 })
431 )
432);
433
434fn token<'a>(input: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, Token> {
435 either!(
436 input,
437 strtok,
438 emptytok, digittok,
440 commatok,
441 rbracetok,
442 lbracetok,
443 lparentok,
444 rparentok,
445 dottok,
446 andtok,
447 ortok,
448 plustok,
449 dashtok,
450 startok,
451 comment, slashtok,
453 modulustok,
454 pcttok,
455 eqeqtok,
456 notequaltok,
457 matchtok,
458 notmatchtok,
459 complete!("Not >=".to_string(), gtequaltok),
460 complete!("Not <=".to_string(), ltequaltok),
461 gttok,
462 lttok,
463 fatcommatok, equaltok,
465 semicolontok,
466 doublecolontok,
467 colontok,
468 leftsquarebracket,
469 rightsquarebracket,
470 booleantok,
471 intok,
472 istok,
473 nottok,
474 lettok,
475 outtok,
476 converttok,
477 selecttok,
478 asserttok,
479 failtok,
480 tracetok,
481 functok,
482 moduletok,
483 importtok,
484 includetok,
485 astok,
486 maptok,
487 filtertok,
488 reducetok,
489 barewordtok,
490 whitespace,
491 end_of_input
492 )
493}
494
495pub fn tokenize<'a>(
499 input: OffsetStrIter<'a>,
500 mut comment_map: Option<&mut CommentMap>,
501) -> std::result::Result<Vec<Token>, BuildError> {
502 let mut out = Vec::new();
503 let mut i = input.clone();
504 let mut comment_group = Vec::new();
505 let mut comment_was_last: Option<Token> = None;
506 loop {
507 if let Result::Complete(_, _) = eoi(i.clone()) {
508 break;
509 }
510 match token(i.clone()) {
511 Result::Abort(e) => {
512 return Err(BuildError::from(e));
513 }
514 Result::Fail(e) => {
515 return Err(BuildError::from(e));
516 }
517 Result::Incomplete(_offset) => {
518 let err =
519 abortable_parser::Error::new("Invalid Token encountered", Box::new(i.clone()));
520 return Err(BuildError::from(err));
521 }
522 Result::Complete(rest, tok) => {
523 i = rest;
524 match (&mut comment_map, &tok.typ) {
525 (&mut Some(_), &TokenType::COMMENT) => {
527 comment_group.push(tok.clone());
528 comment_was_last = Some(tok.clone());
529 continue;
530 }
531 (&mut Some(ref mut map), _) => {
532 if tok.typ != TokenType::WS {
533 out.push(tok);
534 }
535 if let Some(tok) = comment_was_last {
536 map.insert(tok.pos.line, comment_group);
537 comment_group = Vec::new();
538 }
539 }
540 (None, TokenType::WS) | (None, TokenType::COMMENT) => continue,
542 (None, _) => {
543 out.push(tok);
544 }
545 }
546 comment_was_last = None;
547 }
548 }
549 }
550 if let Some(ref mut map) = comment_map {
553 if let Some(ref tok) = comment_group.last() {
554 let line = tok.pos.line;
555 map.insert(line, comment_group);
556 }
557 }
558 out.push(Token {
560 fragment: String::new(),
561 typ: TokenType::END,
562 pos: Position::from(&i),
563 });
564 Ok(out)
565}
566
567pub fn token_clone(t: &Token) -> std::result::Result<Token, Error<SliceIter<Token>>> {
571 Ok(t.clone())
572}
573
574macro_rules! match_type {
577 ($i:expr,BOOLEAN => $h:expr) => {
578 match_type!($i, TokenType::BOOLEAN, "Not a Boolean", $h)
579 };
580
581 ($i:expr,BOOLEAN) => {
582 match_type!($i, BOOLEAN => token_clone)
583 };
584
585 ($i:expr,COMMENT => $h:expr) => {
586 match_type!($i, TokenType::COMMENT, "Not a Comment", $h)
587 };
588
589 ($i:expr,COMMENT) => {
590 match_type!($i, COMMENT => token_clone)
591 };
592
593 ($i:expr,BAREWORD => $h:expr) => {
594 match_type!($i, TokenType::BAREWORD, "Not a Bareword", $h)
595 };
596
597 ($i:expr,BAREWORD) => {
598 match_type!($i, BAREWORD => token_clone)
599 };
600
601 ($i:expr,EMPTY => $h:expr) => {
602 match_type!($i, TokenType::EMPTY, "Not NULL", $h)
603 };
604
605 ($i:expr,EMPTY) => {
606 match_type!($i, EMPTY => token_clone)
607 };
608
609 ($i:expr,STR => $h:expr) => {
610 match_type!($i, TokenType::QUOTED, "Not a String", $h)
611 };
612
613 ($i:expr,STR) => {
614 match_type!($i, STR => token_clone)
615 };
616
617 ($i:expr,DIGIT => $h:expr) => {
618 match_type!($i, TokenType::DIGIT, "Not a DIGIT", $h)
619 };
620
621 ($i:expr,DIGIT) => {
622 match_type!($i, DIGIT => token_clone)
623 };
624
625 ($i:expr,PUNCT => $h:expr) => {
626 match_type!($i, TokenType::PUNCT, "Not PUNCTUATION", $h)
627 };
628
629 ($i:expr,PUNCT) => {
630 match_type!($i, PUNCT => token_clone)
631 };
632
633 ($i:expr, $t:expr, $msg:expr, $h:expr) => {{
634 use abortable_parser::combinators::eoi;
635 use abortable_parser::{Error, Result};
636 use std;
637
638 let mut _i = $i.clone();
639 if eoi(_i.clone()).is_complete() {
640 Result::Fail(Error::new(format!("End of Input! {}", $msg), Box::new(_i)))
641 } else {
642 match _i.next() {
643 Some(tok) => {
644 if tok.typ == $t {
645 match $h(tok) {
646 std::result::Result::Ok(v) => Result::Complete(_i.clone(), v),
647 std::result::Result::Err(e) => {
648 Result::Fail(Error::caused_by($msg, Box::new(e), Box::new(_i)))
649 }
650 }
651 } else {
652 Result::Fail(Error::new($msg.to_string(), Box::new($i)))
653 }
654 }
655 None => Result::Fail(Error::new($msg.to_string(), Box::new($i))),
656 }
657 }
658 }};
659}
660
661macro_rules! match_token {
664 ($i:expr,PUNCT => $f:expr) => {{
665 use crate::tokenizer::token_clone;
666 match_token!($i, PUNCT => $f, token_clone)
667 }};
668
669 ($i:expr,PUNCT => $f:expr, $h:expr) => {
670 match_token!($i, TokenType::PUNCT, $f, format!("({})", $f), $h)
671 };
672
673 ($i:expr,BAREWORD => $f:expr) => {{
674 use crate::tokenizer::token_clone;
675 match_token!($i, BAREWORD => $f, token_clone)
676 }};
677
678 ($i:expr,BAREWORD => $f:expr, $h:expr) => {
679 match_token!(
680 $i,
681 TokenType::BAREWORD,
682 $f,
683 format!("Expected BAREWORD but got ({})", $f),
684 $h
685 )
686 };
687
688 ($i:expr, $t:expr, $f:expr, $msg:expr, $h:expr) => {{
689 use abortable_parser::Result;
690 use std;
691 let mut i_ = $i.clone();
692 let tok = i_.next();
693 if let Some(tok) = tok {
694 if tok.typ == $t && &tok.fragment == $f {
695 match $h(tok) {
696 std::result::Result::Ok(v) => Result::Complete(i_.clone(), v),
697 std::result::Result::Err(e) => {
698 Result::Fail(Error::caused_by($msg, Box::new(e), Box::new(i_)))
699 }
700 }
701 } else {
702 Result::Fail(Error::new(
703 format!("Expected {} but got ({})", $msg, tok.fragment),
704 Box::new($i.clone()),
705 ))
706 }
707 } else {
708 Result::Fail(Error::new("Unexpected End Of Input", Box::new(i_)))
709 }
710 }};
711}
712
713macro_rules! punct {
715 ($i:expr, $c:expr) => {
716 match_token!($i, PUNCT => $c)
717 };
718}
719
720macro_rules! word {
722 ($i:expr, $w:expr) => {
723 match_token!($i, BAREWORD => $w)
724 };
725}
726
727pub fn pos<'a>(i: SliceIter<'a, Token>) -> Result<SliceIter<'a, Token>, Position> {
729 let mut _i = i.clone();
730 let tok = _i.next().unwrap();
731 let pos = tok.pos.clone();
732 Result::Complete(i, pos)
733}
734
735#[cfg(test)]
736mod test;