1use crate::oper::{Assoc, Fixity, OperArg, OperDef, OperDefs};
31use crate::parser::TokenID;
32use anyhow::{Error, Result, anyhow, bail};
33use arena_terms::{Arena, Term, View};
34use chrono::{DateTime, FixedOffset, Utc};
35use smartstring::alias::String;
36use std::io::{self, BufReader, Read};
37use std::iter::FusedIterator;
38use std::mem;
39
40include!(concat!(env!("OUT_DIR"), "/lexer_data.rs"));
47
48#[derive(Debug, Clone, Copy, Default)]
58pub enum Value {
59 #[default]
60 None,
61 Term(Term),
62 Index(usize),
63}
64
65macro_rules! impl_tryfrom_value {
71 ( $( $Variant:ident => $ty:ty ),+ $(,)? ) => {
72 $(
73 impl ::core::convert::TryFrom<Value> for $ty {
74 type Error = ::anyhow::Error;
75 fn try_from(v: Value) -> ::anyhow::Result<Self> {
76 match v {
77 Value::$Variant(x) => Ok(x),
78 _ => ::anyhow::bail!(
79 "invalid value: expected {}",
80 stringify!($Variant),
81 ),
82 }
83 }
84 }
85 )+
86 };
87}
88
89impl_tryfrom_value! {
91 Term => Term,
92 Index => usize,
93}
94
95impl TryFrom<Value> for Option<Term> {
97 type Error = Error;
98 fn try_from(v: Value) -> Result<Self> {
99 match v {
100 Value::None => Ok(None),
101 Value::Term(x) => Ok(Some(x)),
102 _ => ::anyhow::bail!("invalid value: expected Term or None"),
103 }
104 }
105}
106
107#[derive(Debug, Clone)]
121pub struct TermToken {
122 pub token_id: TokenID,
124 pub value: Value,
126 pub line_no: usize,
128 pub op_tab_index: Option<usize>,
130}
131
132impl TermToken {
133 #[must_use]
142 pub fn new(token_id: TokenID, value: Value, line_no: usize) -> Self {
143 Self {
144 token_id,
145 value,
146 line_no,
147 op_tab_index: None,
148 }
149 }
150}
151
152impl Token for TermToken {
157 type TokenID = TokenID;
158
159 fn token_id(&self) -> Self::TokenID {
160 self.token_id
161 }
162 fn line_no(&self) -> usize {
163 self.line_no
164 }
165}
166
167fn parse_date_to_epoch(s: &str, fmt: Option<&str>) -> Result<i64> {
187 let dt_fixed: DateTime<FixedOffset> = match fmt {
188 None => DateTime::parse_from_rfc3339(s)?,
189 Some(layout) => DateTime::parse_from_str(s, layout)?,
190 };
191 let dt_utc = dt_fixed.with_timezone(&Utc);
192 Ok(dt_utc.timestamp_millis())
193}
194
195fn parse_i64(s: &str, base: u32) -> Result<i64> {
212 if s.is_empty() {
213 return Ok(0);
214 }
215 match i64::from_str_radix(s, base) {
216 Ok(n) => Ok(n.try_into()?),
217 Err(e) if e.kind() == &std::num::IntErrorKind::InvalidDigit => {
218 bail!("digit not valid for base")
219 }
220 Err(_) => bail!("number overflowed u64"),
221 }
222}
223
224pub struct TermLexer<I>
238where
239 I: FusedIterator<Item = u8>,
240{
241 ctx: LexerCtx<I, <Self as Lexer<Arena>>::LexerData, <Self as Lexer<Arena>>::Token>,
248
249 pub opers: OperDefs,
252
253 nest_count: isize,
255
256 comment_nest_count: isize,
258
259 curly_nest_count: isize,
261
262 script_curly_nest_count: isize,
264
265 bin_count: isize,
267
268 bin_label: Vec<u8>,
271
272 date_format: String,
275}
276
277impl<I> TermLexer<I>
287where
288 I: FusedIterator<Item = u8>,
289{
290 pub fn try_new(input: I, opers: Option<OperDefs>) -> Result<Self> {
309 Ok(Self {
310 ctx: LexerCtx::try_new(input)?,
311 opers: match opers {
312 Some(opers) => opers,
313 None => OperDefs::new(),
314 },
315 nest_count: 0,
316 comment_nest_count: 0,
317 curly_nest_count: 0,
318 script_curly_nest_count: 0,
319 bin_count: 0,
320 bin_label: Vec::new(),
321 date_format: String::new(),
322 })
323 }
324
325 fn yield_id(&mut self, token_id: TokenID) {
331 self.yield_token(TermToken {
333 token_id,
334 value: Value::None,
335 line_no: self.ctx().line_no,
336 op_tab_index: None,
337 });
338 }
339
340 fn yield_term(&mut self, token_id: TokenID, term: Term) {
345 self.yield_token(TermToken {
346 token_id,
347 value: Value::Term(term),
348 line_no: self.ctx().line_no,
349 op_tab_index: None,
350 });
351 }
352
353 fn yield_index(&mut self, token_id: TokenID, index: usize) {
357 self.yield_token(TermToken {
358 token_id,
359 value: Value::Index(index),
360 line_no: self.ctx().line_no,
361 op_tab_index: None,
362 });
363 }
364
365 fn yield_optab(&mut self, token_id: TokenID, term: Term, op_tab_index: Option<usize>) {
371 self.yield_token(TermToken {
372 token_id,
373 value: Value::Term(term),
374 line_no: self.ctx().line_no,
375 op_tab_index,
376 });
377 }
378}
379
380impl<I> Lexer<Arena> for TermLexer<I>
391where
392 I: FusedIterator<Item = u8>,
393{
394 type Input = I;
395 type LexerData = LexData;
396 type Token = TermToken;
397
398 fn ctx(&self) -> &LexerCtx<Self::Input, Self::LexerData, Self::Token> {
400 &self.ctx
401 }
402
403 fn ctx_mut(&mut self) -> &mut LexerCtx<Self::Input, Self::LexerData, Self::Token> {
405 &mut self.ctx
406 }
407
408 fn action(
427 &mut self,
428 arena: &mut Arena,
429 rule: <Self::LexerData as LexerData>::LexerRule,
430 ) -> Result<()> {
431 log::trace!(
432 "ACTION begin: mode {:?}, rule {:?}, buf {:?}, buf2 {:?}, label {:?}, accum {}",
433 self.ctx().mode,
434 rule,
435 str::from_utf8(&self.ctx().buffer),
436 str::from_utf8(&self.ctx().buffer2),
437 str::from_utf8(&self.bin_label),
438 self.ctx().accum_flag,
439 );
440 match rule {
441 Rule::Empty => {
442 unreachable!()
443 }
444 Rule::LineComment => {}
445 Rule::CommentStart => {
446 if self.comment_nest_count == 0 {
447 self.begin(Mode::Comment);
448 }
449 self.comment_nest_count += 1;
450 }
451 Rule::CommentEnd => {
452 self.comment_nest_count -= 1;
453 if self.comment_nest_count == 0 {
454 self.begin(Mode::Expr);
455 }
456 }
457 Rule::CommentChar | Rule::ExprSpace | Rule::CommentAnyChar => {}
458 Rule::ExprNewLine | Rule::CommentNewLine => {
459 self.ctx_mut().line_no += 1;
460 }
461 Rule::LeftParen => {
462 self.nest_count += 1;
463 self.yield_id(TokenID::LeftParen);
464 }
465 Rule::RightParen => {
466 self.nest_count -= 1;
467 self.yield_id(TokenID::RightParen);
468 }
469 Rule::LeftBrack => {
470 self.nest_count += 1;
471 self.yield_id(TokenID::LeftBrack);
472 }
473 Rule::RightBrack => {
474 self.nest_count -= 1;
475 self.yield_id(TokenID::RightBrack);
476 }
477 Rule::Comma => {
478 self.yield_id(TokenID::Comma);
479 }
480 Rule::Pipe => {
481 self.yield_id(TokenID::Pipe);
482 }
483 Rule::RightBrace => {
484 self.nest_count -= 1;
485 self.curly_nest_count -= 1;
486 if self.curly_nest_count >= 0 {
487 self.begin(Mode::Str);
488 self.yield_id(TokenID::RightParen);
489 let op_tab_idx = self.opers.lookup("++");
490 self.yield_optab(TokenID::AtomOper, arena.atom("++"), op_tab_idx);
491 self.clear();
492 self.accum();
493 } else {
494 self.yield_term(TokenID::Error, arena.str("}"));
495 }
496 }
497 Rule::Func => {
498 self.nest_count += 1;
499 self.ctx_mut().buffer.pop();
500 let s = self.take_str()?;
501 let op_tab_idx = self.opers.lookup(&s);
502 let op_tab = self.opers.get(op_tab_idx);
503
504 let atom = arena.atom(s);
505
506 if op_tab.is_oper() {
507 let (has_empty, has_non_empty) =
508 [Fixity::Prefix, Fixity::Infix, Fixity::Postfix]
509 .iter()
510 .filter_map(|f| {
511 op_tab
512 .get_op_def(*f)
513 .map(|x| x.args.len() <= OperDef::required_arity(*f))
514 })
515 .fold((false, false), |(e, ne), is_empty| {
516 if is_empty { (true, ne) } else { (e, true) }
517 });
518
519 match (has_empty, has_non_empty) {
520 (false, false) => unreachable!(),
521 (true, false) => {
522 self.yield_optab(TokenID::AtomOper, atom, op_tab_idx);
523 self.yield_id(TokenID::LeftParen);
524 }
525 (false, true) => {
526 self.yield_optab(TokenID::FuncOper, atom, op_tab_idx);
527 }
528 (true, true) => bail!("arguments conflict in op defs for {:?}", atom),
529 }
530 } else {
531 self.yield_optab(TokenID::Func, atom, op_tab_idx);
532 }
533 }
534 Rule::Var => {
535 let s = self.take_str()?;
536 self.yield_term(TokenID::Var, arena.var(s));
537 }
538 Rule::Atom => {
539 if self.ctx().buffer == b"." && self.nest_count == 0 {
540 self.yield_id(TokenID::Dot);
541 self.yield_id(TokenID::End);
542 } else {
543 let s = self.take_str()?;
544 let op_tab_idx = self.opers.lookup(&s);
545 let op_tab = self.opers.get(op_tab_idx);
546 let atom = arena.atom(s);
547 if op_tab.is_oper() {
548 self.yield_optab(TokenID::AtomOper, atom, op_tab_idx);
549 } else {
550 self.yield_optab(TokenID::Atom, atom, op_tab_idx);
551 }
552 }
553 }
554
555 Rule::DateEpoch => {
556 let mut s = self.take_str()?;
557 s.pop();
558 s.drain(0..5);
559 let s = s.trim();
560 let d = parse_i64(s, 10)?;
561 self.yield_term(TokenID::Date, arena.date(d));
562 }
563 Rule::Date => {
564 self.begin(Mode::Date);
565 self.clear();
566 self.ctx_mut().buffer2.clear();
567 self.date_format.clear();
568 }
569 Rule::Date1 => {
570 self.begin(Mode::Time);
571 self.date_format.push_str("%Y-%m-%d");
572 self.extend_buffer2_with_buffer();
573 }
574 Rule::Date2 => {
575 self.begin(Mode::Time);
576 self.date_format.push_str("%m/%d/%Y");
577 self.extend_buffer2_with_buffer();
578 }
579 Rule::Date3 => {
580 self.begin(Mode::Time);
581 self.date_format.push_str("%d-%b-%Y");
582 self.extend_buffer2_with_buffer();
583 }
584 Rule::Time1 => {
585 self.begin(Mode::Zone);
586 self.date_format.push_str("T%H:%M:%S%.f");
587 self.extend_buffer2_with_buffer();
588 }
589 Rule::Time2 => {
590 self.begin(Mode::Zone);
591 self.date_format.push_str("T%H:%M:%S");
592 self.extend_buffer2_with_buffer();
593 self.ctx_mut().buffer2.extend(b":00");
594 }
595 Rule::Time3 => {
596 self.begin(Mode::Zone);
597 self.date_format.push_str(" %H:%M:%S%.f");
598 self.extend_buffer2_with_buffer();
599 }
600 Rule::Time4 => {
601 self.begin(Mode::Zone);
602 self.date_format.push_str(" %H:%M:%S");
603 self.extend_buffer2_with_buffer();
604 self.ctx_mut().buffer2.extend(b":00");
605 }
606 Rule::Time5 => {
607 self.begin(Mode::Zone);
608 self.date_format.push_str(" %I:%M:%S%.f %p");
609 self.extend_buffer2_with_buffer();
610 }
611 Rule::Time6 => {
612 self.begin(Mode::Zone);
613 self.date_format.push_str(" %I:%M:%S %p");
614 let ctx = &mut self.ctx_mut();
615 ctx.buffer2.extend(&ctx.buffer[..ctx.buffer.len() - 3]);
616 ctx.buffer2.extend(b":00");
617 ctx.buffer2.extend(&ctx.buffer[ctx.buffer.len() - 3..]);
618 }
619 Rule::Zone1 => {
620 if self.ctx().mode == Mode::Time {
621 self.date_format.push_str(" %H:%M:%S");
622 self.ctx_mut().buffer2.extend(b" 00:00:00");
623 }
624 self.begin(Mode::Expr);
625 self.date_format.push_str("%:z");
626 self.ctx_mut().buffer2.extend(b"+00:00");
627 let s = self.take_str2()?;
628 let d = parse_date_to_epoch(s.trim_end(), Some(self.date_format.as_str()))?;
629 self.yield_term(TokenID::Date, arena.date(d));
630 }
631 Rule::Zone2 => {
632 if self.ctx().mode == Mode::Time {
633 self.date_format.push_str(" %H:%M:%S");
634 self.ctx_mut().buffer2.extend(b" 00:00:00");
635 }
636 self.begin(Mode::Expr);
637 if self.ctx.buffer[0] == b' ' {
638 self.date_format.push(' ');
639 }
640 self.date_format.push_str("%:z");
641 self.ctx_mut().buffer.pop();
642 self.extend_buffer2_with_buffer();
643 let s = self.take_str2()?;
644 let d = parse_date_to_epoch(s.trim_end(), Some(self.date_format.as_str()))?;
645 self.yield_term(TokenID::Date, arena.date(d));
646 }
647 Rule::TimeRightBrace => {
648 self.begin(Mode::Expr);
649 self.date_format.push_str(" %H:%M:%S%:z");
650 self.ctx_mut().buffer2.extend(b" 00:00:00+00:00");
651 let s = self.take_str2()?;
652 let d = parse_date_to_epoch(&s, Some(self.date_format.as_str()))?;
653 self.yield_term(TokenID::Date, arena.date(d));
654 }
655 Rule::ZoneRightBrace => {
656 self.begin(Mode::Expr);
657 self.date_format.push_str("%:z");
658 self.ctx_mut().buffer2.extend(b"+00:00");
659 let s = self.take_str2()?;
660 let d = parse_date_to_epoch(&s, Some(self.date_format.as_str()))?;
661 self.yield_term(TokenID::Date, arena.date(d));
662 }
663
664 Rule::Hex => {
665 self.begin(Mode::Hex);
666 self.ctx_mut().buffer2.clear();
667 }
668 Rule::HexSpace => {}
669 Rule::HexNewLine => {
670 self.ctx_mut().line_no += 1;
671 }
672 Rule::HexByte => {
673 let s = str::from_utf8(&self.ctx().buffer)?;
674 match u8::from_str_radix(s, 16) {
675 Ok(b) => {
676 self.ctx_mut().buffer2.push(b);
677 }
678 Err(_) => {
679 self.yield_term(TokenID::Error, arena.str(s));
680 }
681 }
682 }
683 Rule::HexRightBrace => {
684 self.ctx_mut().buffer.pop();
685 let bytes = self.take_bytes2();
686 self.yield_term(TokenID::Bin, arena.bin(bytes));
687 self.begin(Mode::Expr);
688 }
689 Rule::Bin => {
690 self.begin(Mode::Bin);
691 }
692 Rule::Text => {
693 self.begin(Mode::Text);
694 }
695 Rule::BinSpace | Rule::TextSpace => {}
696 Rule::BinNewLine | Rule::TextNewLine => {
697 self.ctx_mut().line_no += 1;
698 }
699 r @ (Rule::BinCount | Rule::TextCount) => {
700 let s = str::from_utf8(&self.ctx().buffer)?;
701 let mut s = String::from(s.trim());
702 if &s[s.len() - 1..] == "\n" {
703 self.ctx_mut().line_no += 1;
704 }
705 if &s[s.len() - 1..] == ":" {
706 s.pop();
707 }
708 self.bin_count = s.parse()?;
709 if self.bin_count > 0 {
710 if r == Rule::BinCount {
711 self.begin(Mode::BinCount);
712 } else {
713 self.begin(Mode::TextCount);
714 }
715 self.clear();
716 self.accum();
717 }
718 }
719 r @ (Rule::BinCountAnyChar | Rule::TextCountAnyChar) => {
720 self.bin_count -= 1;
721 if self.bin_count == 0 {
722 self.extend_buffer2_with_buffer();
723 self.clear();
724 if r == Rule::BinCountAnyChar {
725 self.begin(Mode::Bin);
726 } else {
727 self.begin(Mode::Text);
728 }
729 }
730 }
731 r @ (Rule::BinCountNLChar | Rule::TextCountNewLine) => {
732 self.ctx_mut().line_no += 1;
733 if self.ctx_mut().buffer[0] == b'\r' {
734 self.ctx_mut().buffer.remove(0);
735 }
736 self.bin_count -= 1;
737 if self.bin_count == 0 {
738 self.extend_buffer2_with_buffer();
739 self.clear();
740 if r == Rule::BinCountNLChar {
741 self.begin(Mode::Bin);
742 } else {
743 self.begin(Mode::Text);
744 }
745 }
746 }
747 r @ (Rule::BinRightBrace | Rule::TextRightBrace) => {
748 if r == Rule::BinRightBrace {
749 let bytes = self.take_bytes2();
750 self.yield_term(TokenID::Bin, arena.bin(bytes));
751 } else {
752 let s = self.take_str2()?;
753 self.yield_term(TokenID::Str, arena.str(s));
754 }
755 self.begin(Mode::Expr);
756 }
757 r @ (Rule::BinLabelStart | Rule::TextLabelStart) => {
758 self.bin_label.clear();
759 let len = self.ctx().buffer.len();
760 if self.ctx_mut().buffer[len - 1] == b'\n' {
761 self.ctx_mut().line_no += 1;
762 self.bin_label.push(b'\n');
763 self.ctx_mut().buffer.pop();
764 let len = self.ctx().buffer.len();
765 if self.ctx_mut().buffer[len - 1] == b'\r' {
766 self.bin_label.insert(0, b'\r');
767 self.ctx_mut().buffer.pop();
768 }
769 } else {
770 let len = self.ctx().buffer.len();
771 let b = self.ctx().buffer[len - 1];
772 self.bin_label.push(b);
773 self.ctx_mut().buffer.pop();
774 }
775
776 let buf = mem::take(&mut self.ctx_mut().buffer);
777 self.bin_label.extend(buf);
778
779 if r == Rule::BinLabelStart {
780 self.begin(Mode::BinLabel);
781 } else {
782 self.begin(Mode::TextLabel);
783 }
784 }
785 r @ (Rule::BinLabelEnd | Rule::TextLabelEnd) => {
786 if self.ctx_mut().buffer[0] != b':' {
787 self.ctx_mut().line_no += 1;
788 }
789 if self.ctx().buffer == self.bin_label {
790 if r == Rule::BinLabelEnd {
791 self.begin(Mode::Bin);
792 } else {
793 self.begin(Mode::Text);
794 }
795 } else {
796 if r == Rule::TextLabelEnd && self.ctx_mut().buffer[0] == b'\r' {
797 self.ctx_mut().buffer.remove(0);
798 }
799 self.extend_buffer2_with_buffer();
800 }
801 }
802 r @ (Rule::BinLabelNLChar | Rule::TextLabelNewLine) => {
803 self.ctx_mut().line_no += 1;
804 if r == Rule::TextLabelNewLine && self.ctx_mut().buffer[0] == b'\r' {
805 self.ctx_mut().buffer.remove(0);
806 }
807 self.extend_buffer2_with_buffer();
808 }
809 Rule::BinLabelAnyChar | Rule::TextLabelAnyChar => {
810 self.extend_buffer2_with_buffer();
811 }
812 Rule::LeftBrace => {
813 self.begin(Mode::Script);
814 self.clear();
815 self.accum();
816 }
817 Rule::ScriptNotBraces => {}
818 Rule::ScriptLeftBrace => {
819 self.script_curly_nest_count += 1;
820 }
821 Rule::ScriptRightBrace => {
822 if self.script_curly_nest_count != 0 {
823 self.script_curly_nest_count -= 1;
824 } else {
825 self.ctx_mut().buffer.pop();
826 let s = self.take_str()?;
827 self.yield_term(TokenID::Str, arena.str(s));
828 self.begin(Mode::Expr);
829 }
830 }
831 Rule::ScriptNewLine => {
832 self.ctx_mut().line_no += 1;
833 }
834 Rule::HexConst => {
835 self.ctx_mut().buffer.drain(0..2);
836 let s = self.take_str()?;
837 let val = parse_i64(s.as_str(), 16)?;
838 self.yield_term(TokenID::Int, arena.int(val));
839 }
840 Rule::BaseConst => {
841 let s = self.take_str()?;
842 let (base_str, digits) =
843 s.split_once('\'').ok_or(anyhow!("missing ' separator"))?;
844 let base: u32 = base_str.parse().map_err(|_| anyhow!("invalid base"))?;
845 let val = parse_i64(digits, base)?;
846 self.yield_term(TokenID::Int, arena.int(val));
847 }
848 Rule::CharHex => {
849 let mut s = self.take_str()?;
850 s.drain(0..4);
851 let val = parse_i64(s.as_str(), 16)?;
852 self.yield_term(TokenID::Int, arena.int(val));
853 }
854 Rule::CharOct => {
855 let mut s = self.take_str()?;
856 s.drain(0..3);
857 let val = parse_i64(s.as_str(), 8)?;
858 self.yield_term(TokenID::Int, arena.int(val));
859 }
860 Rule::CharNewLine1 | Rule::CharNewLine2 | Rule::CharNewLine4 => {
861 self.ctx_mut().line_no += 1;
862 self.yield_term(TokenID::Int, arena.int('\n' as i64));
863 }
864 Rule::CharNotBackslash => {
865 let mut s = self.take_str()?;
866 s.drain(0..2);
867 let val = s.chars().next().ok_or(anyhow!("invalid char"))? as i64;
868 self.yield_term(TokenID::Int, arena.int(val));
869 }
870 Rule::CharCtrl => {
871 let mut s = self.take_str()?;
872 s.drain(0..4);
873 let val = s.chars().next().ok_or(anyhow!("invalid char"))? as i64 - '@' as i64;
874 self.yield_term(TokenID::Int, arena.int(val));
875 }
876 Rule::CharDel1 | Rule::CharDel2 => {
877 self.yield_term(TokenID::Int, arena.int('\x7F' as i64));
878 }
879 Rule::CharEsc => {
880 self.yield_term(TokenID::Int, arena.int('\x1B' as i64));
881 }
882 Rule::CharBell => {
883 self.yield_term(TokenID::Int, arena.int('\u{0007}' as i64));
884 }
885 Rule::CharBackspace => {
886 self.yield_term(TokenID::Int, arena.int('\u{0008}' as i64));
887 }
888 Rule::CharFormFeed => {
889 self.yield_term(TokenID::Int, arena.int('\u{000C}' as i64));
890 }
891 Rule::CharNewLine3 => {
892 self.yield_term(TokenID::Int, arena.int('\n' as i64));
893 }
894 Rule::CharCarriageReturn => {
895 self.yield_term(TokenID::Int, arena.int('\r' as i64));
896 }
897 Rule::CharTab => {
898 self.yield_term(TokenID::Int, arena.int('\t' as i64));
899 }
900 Rule::CharVerticalTab => {
901 self.yield_term(TokenID::Int, arena.int('\u{000B}' as i64));
902 }
903 Rule::CharAny => {
904 let mut s = self.take_str()?;
905 s.drain(0..3);
906 let val = s.chars().next().ok_or(anyhow!("invalid char"))? as i64;
907 self.yield_term(TokenID::Int, arena.int(val));
908 }
909 Rule::OctConst => {
910 let s = self.take_str()?;
911 let val = parse_i64(s.as_str(), 8)?;
912 self.yield_term(TokenID::Int, arena.int(val));
913 }
914 Rule::DecConst => {
915 let s = self.take_str()?;
916 let val = parse_i64(s.as_str(), 10)?;
917 self.yield_term(TokenID::Int, arena.int(val));
918 }
919 Rule::FPConst => {
920 let s = self.take_str()?;
921 let val: f64 = s.parse()?;
922 self.yield_term(TokenID::Real, arena.real(val));
923 }
924 Rule::DoubleQuote => {
925 self.begin(Mode::Str);
926 self.clear();
927 self.accum();
928 }
929 Rule::SingleQuote => {
930 self.begin(Mode::Atom);
931 self.clear();
932 self.accum();
933 }
934 Rule::StrAtomCharHex => {
935 let len = self.ctx().buffer.len();
936 let b: u8 = parse_i64(str::from_utf8(&self.ctx_mut().buffer[len - 2..])?, 16)?
937 .try_into()?;
938 self.ctx_mut().buffer.truncate(len - 4);
939 self.ctx_mut().buffer.push(b);
940 }
941 Rule::StrAtomCharOct => {
942 let slash_pos = self.ctx().buffer.iter().rposition(|&b| b == b'\\').unwrap();
943 let b: u8 = parse_i64(str::from_utf8(&self.ctx().buffer[slash_pos + 1..])?, 8)?
944 .try_into()?;
945 self.ctx_mut().buffer.truncate(slash_pos);
946 self.ctx_mut().buffer.push(b);
947 }
948 Rule::StrAtomCharCtrl => {
949 let len = self.ctx().buffer.len();
950 let b = self.ctx_mut().buffer[len - 1] - b'@';
951 self.ctx_mut().buffer.truncate(len - 3);
952 self.ctx_mut().buffer.push(b);
953 }
954 Rule::StrAtomCharDel1 => {
955 let idx = self.ctx().buffer.len() - 2;
956 self.ctx_mut().buffer.truncate(idx);
957 self.ctx_mut().buffer.push(b'\x7F');
958 }
959 Rule::StrAtomCharDel2 => {
960 let idx = self.ctx().buffer.len() - 3;
961 self.ctx_mut().buffer.truncate(idx);
962 self.ctx_mut().buffer.push(b'\x7F');
963 }
964 Rule::StrAtomCharEsc => {
965 let idx = self.ctx().buffer.len() - 2;
966 self.ctx_mut().buffer.truncate(idx);
967 self.ctx_mut().buffer.push(b'\x1B');
968 }
969 Rule::StrAtomCharBell => {
970 let idx = self.ctx().buffer.len() - 2;
971 self.ctx_mut().buffer.truncate(idx);
972 self.ctx_mut().buffer.push(b'\x07');
973 }
974 Rule::StrAtomCharBackspace => {
975 let idx = self.ctx().buffer.len() - 2;
976 self.ctx_mut().buffer.truncate(idx);
977 self.ctx_mut().buffer.push(b'\x08');
978 }
979 Rule::StrAtomCharFormFeed => {
980 let idx = self.ctx().buffer.len() - 2;
981 self.ctx_mut().buffer.truncate(idx);
982 self.ctx_mut().buffer.push(b'\x0C');
983 }
984 Rule::StrAtomCharNewLine => {
985 let idx = self.ctx().buffer.len() - 2;
986 self.ctx_mut().buffer.truncate(idx);
987 self.ctx_mut().buffer.push(b'\n');
988 }
989 Rule::StrAtomCharCarriageReturn => {
990 let idx = self.ctx().buffer.len() - 2;
991 self.ctx_mut().buffer.truncate(idx);
992 self.ctx_mut().buffer.push(b'\r');
993 }
994 Rule::StrAtomCharTab => {
995 let idx = self.ctx().buffer.len() - 2;
996 self.ctx_mut().buffer.truncate(idx);
997 self.ctx_mut().buffer.push(b'\t');
998 }
999 Rule::StrAtomVerticalTab => {
1000 let idx = self.ctx().buffer.len() - 2;
1001 self.ctx_mut().buffer.truncate(idx);
1002 self.ctx_mut().buffer.push(b'\x0B');
1003 }
1004 Rule::StrAtomCharSkipNewLine => {
1005 self.ctx_mut().line_no += 1;
1006 self.ctx_mut().buffer.pop();
1007 let idx = self.ctx().buffer.len() - 1;
1008 if self.ctx_mut().buffer[idx] == b'\r' {
1009 self.ctx_mut().buffer.pop();
1010 }
1011 self.ctx_mut().buffer.pop();
1012 }
1013 Rule::StrAtomCharAny | Rule::StrAtomCharBackslash => {
1014 let idx = self.ctx().buffer.len() - 2;
1015 self.ctx_mut().buffer.remove(idx);
1016 }
1017 Rule::StrChar | Rule::AtomChar | Rule::StrAtomCarriageReturn => {}
1018 Rule::StrDoubleQuote => {
1019 self.begin(Mode::Expr);
1020 self.ctx_mut().buffer.pop();
1021 let s = self.take_str()?;
1022 self.yield_term(TokenID::Str, arena.str(s));
1023 }
1024 Rule::AtomSingleQuote => {
1025 self.begin(Mode::Expr);
1026 self.ctx_mut().buffer.pop();
1027 let s = self.take_str()?;
1028 self.yield_term(TokenID::Atom, arena.atom(s));
1029 }
1030 Rule::AtomLeftParen => {
1031 self.begin(Mode::Expr);
1032 self.nest_count += 1;
1033 let mut s = self.take_str()?;
1034 s.truncate(s.len() - 2);
1035 self.yield_term(TokenID::Func, arena.atom(s));
1036 }
1037 Rule::AtomLeftBrace => {}
1038 Rule::StrLeftBrace => {
1039 self.begin(Mode::Expr);
1040 self.nest_count += 1;
1041 self.curly_nest_count += 1;
1042 let mut s = self.take_str()?;
1043 s.pop();
1044 self.yield_term(TokenID::Str, arena.str(s));
1045 let op_tab_idx = self.opers.lookup("++");
1046 self.yield_optab(TokenID::AtomOper, arena.atom("++"), op_tab_idx);
1047 self.yield_id(TokenID::LeftParen);
1048 }
1049 Rule::StrAtomNewLine => {
1050 self.ctx_mut().line_no += 1;
1051 }
1052 Rule::Error => {
1053 let s = self.take_str()?;
1054 self.yield_term(TokenID::Error, arena.str(s));
1055 }
1056 Rule::End => {
1057 if self.ctx().mode == Mode::Expr {
1058 self.yield_id(TokenID::End);
1059 } else {
1060 self.yield_term(TokenID::Error, arena.str("<END>"));
1061 }
1062 }
1063 }
1064
1065 log::trace!(
1066 "ACTION end: mode {:?}, rule {:?}, buf {:?}, buf2 {:?}, label {:?}, accum {}",
1067 self.ctx().mode,
1068 rule,
1069 str::from_utf8(&self.ctx().buffer),
1070 str::from_utf8(&self.ctx().buffer2),
1071 str::from_utf8(&self.bin_label),
1072 self.ctx().accum_flag,
1073 );
1074
1075 Ok(())
1076 }
1077}
1078
1079#[cfg(test)]
1081mod tests {
1082 use super::*;
1083
1084 fn lex(arena: &mut Arena, s: &str) -> Result<Vec<TermToken>> {
1085 let mut lx = TermLexer::try_new(s.bytes().fuse(), Some(OperDefs::new()))?;
1086 Ok(lx.try_collect(arena)?)
1087 }
1088
1089 #[test]
1090 fn test_dates() {
1091 let _ = env_logger::builder().is_test(true).try_init();
1092 let mut arena = Arena::new();
1093 const DATES: &[(&str, u8)] = &[
1094 ("date{-5381856000000}", 0),
1095 ("date{-5381830320000}", 1),
1096 ("date{-5381830311000}", 2),
1097 ("date{-5381830310999}", 3),
1098 ("date{1799-06-16}", 0),
1099 ("date{1799-06-16Z}", 0),
1100 ("date{1799-06-16 Z}", 0),
1101 ("date{1799-06-16-00:00}", 0),
1102 ("date{1799-06-16 -00:00}", 0),
1103 ("date{1799-06-16T07:08}", 1),
1104 ("date{1799-06-16T07:08:09}", 2),
1105 ("date{1799-06-16T07:08:09Z}", 2),
1106 ("date{1799-06-16T07:08:09.001Z}", 3),
1107 ("date{1799-06-16T07:08:09 Z}", 2),
1108 ("date{1799-06-16T07:08:09.001 Z}", 3),
1109 ("date{1799-06-16T07:08:09+00:00}", 2),
1110 ("date{1799-06-16T07:08:09.001+00:00}", 3),
1111 ("date{1799-06-16T07:08:09 +00:00}", 2),
1112 ("date{1799-06-16T07:08:09.001 +00:00}", 3),
1113 ("date{1799-06-16T07:08:09Z}", 2),
1114 ("date{1799-06-16T07:08:09.001Z}", 3),
1115 ("date{1799-06-16 07:08:09 Z}", 2),
1116 ("date{1799-06-16T07:08:09.001 Z}", 3),
1117 ("date{1799-06-16 07:08:09+00:00}", 2),
1118 ("date{1799-06-16T07:08:09.001+00:00}", 3),
1119 ("date{1799-06-16 07:08:09 +00:00}", 2),
1120 ("date{1799-06-16 07:08:09.001 +00:00}", 3),
1121 ("date{1799-06-16T07:08Z}", 1),
1122 ("date{1799-06-16T07:08 Z }", 1),
1123 ("date{ 1799-06-16T07:08+00:00}", 1),
1124 ("date{ 1799-06-16T07:08 +00:00 }", 1),
1125 ("date{06/16/1799Z}", 0),
1126 ("date{06/16/1799 Z}", 0),
1127 ("date{06/16/1799+00:00}", 0),
1128 ("date{06/16/1799 +00:00}", 0),
1129 ("date{06/16/1799 07:08Z}", 1),
1130 ("date{06/16/1799 07:08:09Z}", 2),
1131 ("date{06/16/1799 07:08:09.001Z}", 3),
1132 ("date{06/16/1799 07:08 Z}", 1),
1133 ("date{06/16/1799 07:08:09 Z}", 2),
1134 ("date{06/16/1799 07:08:09.001 Z}", 3),
1135 ("date{06/16/1799 07:08+00:00}", 1),
1136 ("date{06/16/1799 07:08:09+00:00}", 2),
1137 ("date{06/16/1799 07:08:09.001+00:00}", 3),
1138 ("date{06/16/1799 07:08 +00:00}", 1),
1139 ("date{06/16/1799 07:08:09 +00:00}", 2),
1140 ("date{06/16/1799 07:08:09.001 +00:00}", 3),
1141 ("date{16-Jun-1799Z}", 0),
1142 ("date{16-jun-1799 Z}", 0),
1143 ("date{16-JUN-1799+00:00}", 0),
1144 ("date{16-Jun-1799 +00:00}", 0),
1145 ("date{16-Jun-1799 07:08Z}", 1),
1146 ("date{16-JUN-1799 07:08:09Z}", 2),
1147 ("date{16-Jun-1799 07:08:09.001Z}", 3),
1148 ("date{16-Jun-1799 07:08 Z}", 1),
1149 ("date{16-jun-1799 07:08:09 Z}", 2),
1150 ("date{16-Jun-1799 07:08:09.001 Z}", 3),
1151 ("date{16-Jun-1799 07:08+00:00}", 1),
1152 ("date{16-Jun-1799 07:08:09+00:00}", 2),
1153 ("date{16-Jun-1799 07:08:09.001+00:00}", 3),
1154 ("date{16-Jun-1799 07:08 +00:00}", 1),
1155 ("date{16-Jun-1799 07:08:09 +00:00}", 2),
1156 ("date{16-Jun-1799 07:08:09.001 +00:00}", 3),
1157 ];
1158 for (s, k) in DATES {
1159 let mut ts = lex(&mut arena, s).unwrap();
1160 let tok = ts.remove(0);
1161 assert_eq!(tok.token_id, TokenID::Date);
1162 let term = Term::try_from(tok.value).unwrap();
1163 let d = term.unpack_date(&arena).unwrap();
1164 assert_eq!(
1165 d,
1166 match k {
1167 0 => -5381856000000,
1168 1 => -5381830320000,
1169 2 => -5381830311000,
1170 3 => -5381830310999,
1171 _ => unreachable!(),
1172 }
1173 );
1174 }
1175 }
1176
1177 #[test]
1178 fn test_atoms() {
1179 let mut arena = Arena::new();
1180 let ts = lex(&mut arena, "\na+foo-x '^&%^&%^&%''abc' 'AAA'").unwrap();
1181 dbg!(&ts);
1182 assert!(ts.len() == 9);
1183 assert!(ts.iter().take(ts.len() - 1).all(|t| {
1184 t.line_no == 2
1185 && matches!(
1186 Term::try_from(t.value.clone())
1187 .unwrap()
1188 .view(&arena)
1189 .unwrap(),
1190 View::Atom(_)
1191 )
1192 }));
1193 }
1194
1195 #[test]
1196 fn test_bin() {
1197 let mut arena = Arena::new();
1198 let ts = lex(&mut arena, "% single line comment\nbin{3:\x00\x01\x02 eob:\x00\x01:aaa\x02:eob eob\n\x00\neob eob\r\n\x00\r\neob\r\n}\r\nhex{ 0203 0405 FE }").unwrap();
1199 dbg!(&ts);
1200 assert!(ts.len() == 3);
1201 assert!(matches!(
1202 Term::try_from(ts[0].value.clone())
1203 .unwrap()
1204 .view(&arena)
1205 .unwrap(),
1206 View::Bin(_)
1207 ));
1208 match Term::try_from(ts[0].value.clone())
1209 .unwrap()
1210 .view(&arena)
1211 .unwrap()
1212 {
1213 View::Bin(bytes) => assert!(bytes == &[0, 1, 2, 0, 1, 58, 97, 97, 97, 2, 0, 0,]),
1214 _ => unreachable!(),
1215 }
1216 }
1217
1218 #[test]
1219 fn test_text() {
1220 let mut arena = Arena::new();
1221 let ts = lex(&mut arena, "/* single /* line */ comment */\ntext{3:abc eob:de:aaa:eob eob\n0\neob eob\r\n1\r\neob\r\n}\r\n").unwrap();
1222 dbg!(&ts);
1223 assert!(ts.len() == 2);
1224 assert!(matches!(
1225 Term::try_from(ts[0].value.clone())
1226 .unwrap()
1227 .view(&arena)
1228 .unwrap(),
1229 View::Str(_)
1230 ));
1231 match Term::try_from(ts[0].value.clone())
1232 .unwrap()
1233 .view(&arena)
1234 .unwrap()
1235 {
1236 View::Str(s) => assert!(s == "abcde:aaa01"),
1237 _ => unreachable!(),
1238 }
1239 }
1240
1241 #[test]
1242 fn test_texts() {
1243 let mut arena = Arena::new();
1244 let ts = lex(&mut arena, "/* single [ ( { /* line */ comment */\n\"hello\" {hello} text{5:hello} text{e:hello:e} text{e:h:e e:e:e 2:ll e:o:e} text{\ne\nhello\ne}").unwrap();
1245 dbg!(&ts);
1246 assert!(ts.len() == 7);
1247 assert!(matches!(
1248 Term::try_from(ts[0].value.clone())
1249 .unwrap()
1250 .view(&arena)
1251 .unwrap(),
1252 View::Str(_)
1253 ));
1254 assert!(ts.iter().take(ts.len() - 1).all(|t| {
1255 match Term::try_from(t.value.clone())
1256 .unwrap()
1257 .view(&arena)
1258 .unwrap()
1259 {
1260 View::Str(s) => s == "hello",
1261 _ => false,
1262 }
1263 }));
1264 }
1265
1266 #[test]
1267 fn test_integers() {
1268 let mut arena = Arena::new();
1269 let ts = lex(&mut arena, "[2'01010001111, 10'123, 36'AZ]").unwrap();
1270 assert!(ts.len() == 8);
1271 assert!(matches!(ts[1].token_id, TokenID::Int));
1272 }
1273
1274 #[test]
1275 fn lex_string_subs() {
1276 let _ = env_logger::builder().is_test(true).try_init();
1277 let arena = &mut Arena::new();
1278 let ts = lex(arena, "\"aaa{1 + 2}bbb{3 * 4}ccc\"").unwrap();
1279 assert_eq!(ts.len(), 18);
1280 let t0: Term = ts[0].value.clone().try_into().unwrap();
1281 let t1: Term = ts[8].value.clone().try_into().unwrap();
1282 let t2: Term = ts[16].value.clone().try_into().unwrap();
1283 assert_eq!(t0.unpack_str(arena).unwrap(), "aaa");
1284 assert_eq!(t1.unpack_str(arena).unwrap(), "bbb");
1285 assert_eq!(t2.unpack_str(arena).unwrap(), "ccc");
1286 }
1287}