1use crate::{TermToken, TokenID, Value};
31use lexer_data::{LexData, Mode, Rule};
32use parlex::{Lexer, LexerData, LexerDriver, LexerStats, ParlexError, Span};
33use std::marker::PhantomData;
34use try_next::TryNextWithContext;
35
36use arena_terms::{Arena, Fixity, OperDef, Term};
37use chrono::{DateTime, FixedOffset, Utc};
38use smartstring::alias::String;
39
40pub mod lexer_data {
51 include!(concat!(env!("OUT_DIR"), "/lexer_data.rs"));
52}
53
54fn parse_date_to_epoch(s: &str, fmt: Option<&str>) -> Result<i64, ParlexError> {
74 let dt_fixed: DateTime<FixedOffset> = match fmt {
75 None => DateTime::parse_from_rfc3339(s).map_err(|e| ParlexError::from_err(e, None))?,
76 Some(layout) => {
77 DateTime::parse_from_str(s, layout).map_err(|e| ParlexError::from_err(e, None))?
78 }
79 };
80 let dt_utc = dt_fixed.with_timezone(&Utc);
81 Ok(dt_utc.timestamp_millis())
82}
83
84fn parse_i64(s: &str, base: u32) -> Result<i64, std::num::ParseIntError> {
101 if s.is_empty() {
102 return Ok(0);
103 }
104 let n = i64::from_str_radix(s, base)?;
105 Ok(n)
106}
107
108pub struct TermLexerDriver<I> {
134 _marker: PhantomData<I>,
136
137 pub buffer2: Vec<u8>,
139
140 pub span2: Span,
142
143 nest_count: isize,
145
146 comment_nest_count: isize,
148
149 curly_nest_count: isize,
151
152 script_curly_nest_count: isize,
154
155 bin_count: isize,
157
158 bin_label: Vec<u8>,
161
162 date_format: String,
165}
166
167#[inline]
168fn yield_id<I>(lexer: &mut Lexer<I, TermLexerDriver<I>, Arena>, token_id: TokenID)
169where
170 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
171{
172 lexer.yield_token(TermToken {
173 token_id,
174 value: Value::None,
175 span: Some(lexer.span()),
176 op_tab_index: None,
177 });
178}
179
180#[inline]
181fn yield_term<I>(
182 lexer: &mut Lexer<I, TermLexerDriver<I>, Arena>,
183 token_id: TokenID,
184 term: Term,
185 span: Span,
186) where
187 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
188{
189 lexer.yield_token(TermToken {
190 token_id,
191 value: Value::Term(term),
192 span: Some(span),
193 op_tab_index: None,
194 });
195}
196
197#[inline]
198fn yield_optab<I>(
199 lexer: &mut Lexer<I, TermLexerDriver<I>, Arena>,
200 token_id: TokenID,
201 term: Term,
202 op_tab_index: Option<usize>,
203 span: Span,
204) where
205 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
206{
207 lexer.yield_token(TermToken {
208 token_id,
209 value: Value::Term(term),
210 span: Some(span),
211 op_tab_index,
212 });
213}
214
215#[inline]
216fn take_bytes<I>(lexer: &mut Lexer<I, TermLexerDriver<I>, Arena>) -> Vec<u8>
217where
218 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
219{
220 lexer.accum_flag = false;
221 ::core::mem::take(&mut lexer.buffer)
222}
223
224#[inline]
225fn take_str<I>(lexer: &mut Lexer<I, TermLexerDriver<I>, Arena>) -> Result<String, ParlexError>
226where
227 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
228{
229 lexer.accum_flag = false;
230 let bytes = take_bytes(lexer);
231 let s = ::std::string::String::from_utf8(bytes)
232 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
233 Ok(String::from(s))
234}
235
236impl<I> TermLexerDriver<I> {
237 #[inline]
238 fn take_bytes2(&mut self, lexer: &mut Lexer<I, Self, Arena>) -> Vec<u8>
239 where
240 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
241 {
242 lexer.accum_flag = false;
243 std::mem::take(&mut self.buffer2)
244 }
245
246 #[inline]
247 fn take_str2(&mut self, lexer: &mut Lexer<I, Self, Arena>) -> Result<String, ParlexError>
248 where
249 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
250 {
251 lexer.accum_flag = false;
252 let bytes = std::mem::take(&mut self.buffer2);
253 let s = std::string::String::from_utf8(bytes)
254 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
255 Ok(String::from(s))
256 }
257}
258
259impl<I> LexerDriver for TermLexerDriver<I>
260where
261 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
262{
263 type LexerData = LexData;
265
266 type Token = TermToken;
268
269 type Lexer = Lexer<I, Self, Self::Context>;
271
272 type Context = Arena;
274
275 fn action(
291 &mut self,
292 lexer: &mut Self::Lexer,
293 arena: &mut Self::Context,
294 rule: <Self::LexerData as LexerData>::LexerRule,
295 ) -> Result<(), ParlexError> {
296 log::trace!(
297 "ACTION begin: mode {:?}, rule {:?}, buf {:?}, buf2 {:?}, label {:?}, accum {}",
298 lexer.mode(),
299 rule,
300 str::from_utf8(&lexer.buffer),
301 str::from_utf8(&self.buffer2),
302 str::from_utf8(&self.bin_label),
303 lexer.accum_flag,
304 );
305 match rule {
306 Rule::Empty => {
307 unreachable!()
308 }
309 Rule::LineComment => {}
310 Rule::CommentStart => {
311 if self.comment_nest_count == 0 {
312 lexer.begin(Mode::Comment);
313 }
314 self.comment_nest_count += 1;
315 }
316 Rule::CommentEnd => {
317 self.comment_nest_count -= 1;
318 if self.comment_nest_count == 0 {
319 lexer.begin(Mode::Expr);
320 }
321 }
322 Rule::CommentChar | Rule::ExprSpace | Rule::CommentAnyChar => {}
323 Rule::ExprNewLine | Rule::CommentNewLine => {
324 }
326 Rule::LeftParen => {
327 self.nest_count += 1;
328 yield_id(lexer, TokenID::LeftParen);
329 }
330 Rule::RightParen => {
331 self.nest_count -= 1;
332 yield_id(lexer, TokenID::RightParen);
333 }
334 Rule::LeftBrack => {
335 self.nest_count += 1;
336 yield_id(lexer, TokenID::LeftBrack);
337 }
338 Rule::RightBrack => {
339 self.nest_count -= 1;
340 yield_id(lexer, TokenID::RightBrack);
341 }
342 Rule::Comma => {
343 yield_id(lexer, TokenID::Comma);
344 }
345 Rule::Pipe => {
346 yield_id(lexer, TokenID::Pipe);
347 }
348 Rule::RightBrace => {
349 self.nest_count -= 1;
350 self.curly_nest_count -= 1;
351 if self.curly_nest_count >= 0 {
352 lexer.begin(Mode::Str);
353 yield_id(lexer, TokenID::RightParen);
354 let op_tab_idx = arena.lookup_oper("++");
355 yield_optab(
356 lexer,
357 TokenID::AtomOper,
358 arena.atom("++"),
359 op_tab_idx,
360 lexer.span(),
361 );
362 lexer.clear();
363 lexer.accum();
364 } else {
365 return Err(ParlexError {
366 message: format!("error on lexeme `}}`"),
367 span: Some(lexer.span()),
368 });
369 }
370 }
371 Rule::Func => {
372 self.nest_count += 1;
373 lexer.buffer.pop();
374 let s = take_str(lexer)?;
375 let atom = arena.atom(&s);
376 let op_tab_idx = arena.lookup_oper(&s);
377 let op_tab = arena.get_oper(op_tab_idx);
378 if op_tab.is_oper() {
379 let (has_empty, has_non_empty) =
380 [Fixity::Prefix, Fixity::Infix, Fixity::Postfix]
381 .iter()
382 .filter_map(|f| {
383 op_tab
384 .get_op_def(*f)
385 .map(|x| x.args.len() <= OperDef::required_arity(*f))
386 })
387 .fold((false, false), |(e, ne), is_empty| {
388 if is_empty { (true, ne) } else { (e, true) }
389 });
390
391 match (has_empty, has_non_empty) {
392 (false, false) => unreachable!(),
393 (true, false) => {
394 yield_optab(lexer, TokenID::AtomOper, atom, op_tab_idx, lexer.span());
395 yield_id(lexer, TokenID::LeftParen);
396 }
397 (false, true) => {
398 yield_optab(lexer, TokenID::FuncOper, atom, op_tab_idx, lexer.span());
399 }
400 (true, true) => {
401 return Err(ParlexError {
402 message: format!("arguments conflict in op defs for {:?}", atom),
403 span: Some(lexer.span()),
404 });
405 }
406 }
407 } else {
408 yield_optab(lexer, TokenID::Func, atom, op_tab_idx, lexer.span());
409 }
410 }
411 Rule::Var => {
412 let s = take_str(lexer)?;
413 yield_term(lexer, TokenID::Var, arena.var(s), lexer.span());
414 }
415 Rule::Atom => {
416 if lexer.buffer == b"." && self.nest_count == 0 {
417 yield_id(lexer, TokenID::Dot);
418 yield_id(lexer, TokenID::End);
419 } else {
420 let s = take_str(lexer)?;
421 let atom = arena.atom(&s);
422 let op_tab_idx = arena.lookup_oper(&s);
423 let op_tab = arena.get_oper(op_tab_idx);
424 if op_tab.is_oper() {
425 yield_optab(lexer, TokenID::AtomOper, atom, op_tab_idx, lexer.span());
426 } else {
427 yield_optab(lexer, TokenID::Atom, atom, op_tab_idx, lexer.span());
428 }
429 }
430 }
431
432 Rule::DateEpoch => {
433 let mut s = take_str(lexer)?;
434 s.pop();
435 s.drain(0..5);
436 let s = s.trim();
437 let d =
438 parse_i64(s, 10).map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
439 yield_term(lexer, TokenID::Date, arena.date(d), lexer.span());
440 }
441 Rule::Date => {
442 lexer.begin(Mode::Date);
443 lexer.clear();
444 self.buffer2.clear();
445 self.date_format.clear();
446 self.span2 = lexer.span();
447 }
448 Rule::Date1 => {
449 lexer.begin(Mode::Time);
450 self.date_format.push_str("%Y-%m-%d");
451 self.buffer2.extend(&lexer.buffer);
452 self.span2.merge(lexer.span_ref());
453 }
454 Rule::Date2 => {
455 lexer.begin(Mode::Time);
456 self.date_format.push_str("%m/%d/%Y");
457 self.buffer2.extend(&lexer.buffer);
458 self.span2.merge(lexer.span_ref());
459 }
460 Rule::Date3 => {
461 lexer.begin(Mode::Time);
462 self.date_format.push_str("%d-%b-%Y");
463 self.buffer2.extend(&lexer.buffer);
464 self.span2.merge(lexer.span_ref());
465 }
466 Rule::Time1 => {
467 lexer.begin(Mode::Zone);
468 self.date_format.push_str("T%H:%M:%S%.f");
469 self.buffer2.extend(&lexer.buffer);
470 self.span2.merge(lexer.span_ref());
471 }
472 Rule::Time2 => {
473 lexer.begin(Mode::Zone);
474 self.date_format.push_str("T%H:%M:%S");
475 self.buffer2.extend(&lexer.buffer);
476 self.buffer2.extend(b":00");
477 self.span2.merge(lexer.span_ref());
478 }
479 Rule::Time3 => {
480 lexer.begin(Mode::Zone);
481 self.date_format.push_str(" %H:%M:%S%.f");
482 self.buffer2.extend(&lexer.buffer);
483 self.span2.merge(lexer.span_ref());
484 }
485 Rule::Time4 => {
486 lexer.begin(Mode::Zone);
487 self.date_format.push_str(" %H:%M:%S");
488 self.buffer2.extend(&lexer.buffer);
489 self.buffer2.extend(b":00");
490 self.span2.merge(lexer.span_ref());
491 }
492 Rule::Time5 => {
493 lexer.begin(Mode::Zone);
494 self.date_format.push_str(" %I:%M:%S%.f %p");
495 self.buffer2.extend(&lexer.buffer);
496 self.span2.merge(lexer.span_ref());
497 }
498 Rule::Time6 => {
499 lexer.begin(Mode::Zone);
500 self.date_format.push_str(" %I:%M:%S %p");
501 self.buffer2.extend(&lexer.buffer[..lexer.buffer.len() - 3]);
502 self.buffer2.extend(b":00");
503 self.buffer2.extend(&lexer.buffer[lexer.buffer.len() - 3..]);
504 self.span2.merge(lexer.span_ref());
505 }
506 Rule::Zone1 => {
507 if lexer.mode() == Mode::Time {
508 self.date_format.push_str(" %H:%M:%S");
509 self.buffer2.extend(b" 00:00:00");
510 }
511 lexer.begin(Mode::Expr);
512 self.date_format.push_str("%:z");
513 self.buffer2.extend(b"+00:00");
514 let s = self.take_str2(lexer)?;
515 let d = parse_date_to_epoch(s.trim_end(), Some(self.date_format.as_str()))?;
516 self.span2.merge(lexer.span_ref());
517 yield_term(lexer, TokenID::Date, arena.date(d), self.span2);
518 }
519 Rule::Zone2 => {
520 self.span2.merge(lexer.span_ref());
521 if lexer.mode() == Mode::Time {
522 self.date_format.push_str(" %H:%M:%S");
523 self.buffer2.extend(b" 00:00:00");
524 }
525 lexer.begin(Mode::Expr);
526 if lexer.buffer[0] == b' ' {
527 self.date_format.push(' ');
528 }
529 self.date_format.push_str("%:z");
530 lexer.buffer.pop();
531 self.buffer2.extend(&lexer.buffer);
532 let s = self.take_str2(lexer)?;
533 let d = parse_date_to_epoch(s.trim_end(), Some(self.date_format.as_str()))?;
534 yield_term(lexer, TokenID::Date, arena.date(d), self.span2);
535 }
536 Rule::TimeRightBrace => {
537 self.span2.merge(lexer.span_ref());
538 lexer.begin(Mode::Expr);
539 self.date_format.push_str(" %H:%M:%S%:z");
540 self.buffer2.extend(b" 00:00:00+00:00");
541 let s = self.take_str2(lexer)?;
542 let d = parse_date_to_epoch(&s, Some(self.date_format.as_str()))?;
543 yield_term(lexer, TokenID::Date, arena.date(d), self.span2);
544 }
545 Rule::ZoneRightBrace => {
546 self.span2.merge(lexer.span_ref());
547 lexer.begin(Mode::Expr);
548 self.date_format.push_str("%:z");
549 self.buffer2.extend(b"+00:00");
550 let s = self.take_str2(lexer)?;
551 let d = parse_date_to_epoch(&s, Some(self.date_format.as_str()))?;
552 yield_term(lexer, TokenID::Date, arena.date(d), self.span2);
553 }
554
555 Rule::Hex => {
556 lexer.begin(Mode::Hex);
557 self.buffer2.clear();
558 self.span2 = lexer.span();
559 }
560 Rule::HexSpace => {
561 self.span2.merge(lexer.span_ref());
562 }
563 Rule::HexNewLine => {
564 self.span2.merge(lexer.span_ref());
566 }
567 Rule::HexByte => {
568 let s = str::from_utf8(&lexer.buffer)
569 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
570 self.span2.merge(lexer.span_ref());
571 let b = u8::from_str_radix(s, 16)
572 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
573 self.buffer2.push(b);
574 }
575 Rule::HexRightBrace => {
576 lexer.buffer.pop();
577 let bytes = self.take_bytes2(lexer);
578 self.span2.merge(lexer.span_ref());
579 yield_term(lexer, TokenID::Bin, arena.bin(bytes), self.span2);
580 lexer.begin(Mode::Expr);
581 }
582 Rule::Bin => {
583 lexer.begin(Mode::Bin);
584 self.span2 = lexer.span();
585 }
586 Rule::Text => {
587 lexer.begin(Mode::Text);
588 self.span2 = lexer.span();
589 }
590 Rule::BinSpace | Rule::TextSpace => {
591 self.span2.merge(lexer.span_ref());
592 }
593 Rule::BinNewLine | Rule::TextNewLine => {
594 self.span2.merge(lexer.span_ref());
596 }
597 r @ (Rule::BinCount | Rule::TextCount) => {
598 let s = str::from_utf8(&lexer.buffer)
599 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
600 let mut s = String::from(s.trim());
601 self.span2.merge(lexer.span_ref());
602 if &s[s.len() - 1..] == "\n" {
603 }
605 if &s[s.len() - 1..] == ":" {
606 s.pop();
607 }
608 self.bin_count = s
609 .parse()
610 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
611 if self.bin_count > 0 {
612 if r == Rule::BinCount {
613 lexer.begin(Mode::BinCount);
614 } else {
615 lexer.begin(Mode::TextCount);
616 }
617 lexer.clear();
618 lexer.accum();
619 }
620 }
621 r @ (Rule::BinCountAnyChar | Rule::TextCountAnyChar) => {
622 self.span2.merge(lexer.span_ref());
623 self.bin_count -= 1;
624 if self.bin_count == 0 {
625 self.buffer2.extend(&lexer.buffer);
626 lexer.clear();
627 if r == Rule::BinCountAnyChar {
628 lexer.begin(Mode::Bin);
629 } else {
630 lexer.begin(Mode::Text);
631 }
632 }
633 }
634 r @ (Rule::BinCountNLChar | Rule::TextCountNewLine) => {
635 self.span2.merge(lexer.span_ref());
637 if lexer.buffer[0] == b'\r' {
638 lexer.buffer.remove(0);
639 }
640 self.bin_count -= 1;
641 if self.bin_count == 0 {
642 self.buffer2.extend(&lexer.buffer);
643 lexer.clear();
644 if r == Rule::BinCountNLChar {
645 lexer.begin(Mode::Bin);
646 } else {
647 lexer.begin(Mode::Text);
648 }
649 }
650 }
651 r @ (Rule::BinRightBrace | Rule::TextRightBrace) => {
652 self.span2.merge(lexer.span_ref());
653 if r == Rule::BinRightBrace {
654 let bytes = self.take_bytes2(lexer);
655 yield_term(lexer, TokenID::Bin, arena.bin(bytes), self.span2);
656 } else {
657 let s = self.take_str2(lexer)?;
658 yield_term(lexer, TokenID::Str, arena.str(s), self.span2);
659 }
660 lexer.begin(Mode::Expr);
661 }
662 r @ (Rule::BinLabelStart | Rule::TextLabelStart) => {
663 self.span2.merge(lexer.span_ref());
664
665 self.bin_label.clear();
666 let len = lexer.buffer.len();
667 if lexer.buffer[len - 1] == b'\n' {
668 self.bin_label.push(b'\n');
670 lexer.buffer.pop();
671 let len = lexer.buffer.len();
672 if lexer.buffer[len - 1] == b'\r' {
673 self.bin_label.insert(0, b'\r');
674 lexer.buffer.pop();
675 }
676 } else {
677 let len = lexer.buffer.len();
678 let b = lexer.buffer[len - 1];
679 self.bin_label.push(b);
680 lexer.buffer.pop();
681 }
682
683 let buf = std::mem::take(&mut lexer.buffer);
684 self.bin_label.extend(buf);
685
686 if r == Rule::BinLabelStart {
687 lexer.begin(Mode::BinLabel);
688 } else {
689 lexer.begin(Mode::TextLabel);
690 }
691 }
692 r @ (Rule::BinLabelEnd | Rule::TextLabelEnd) => {
693 self.span2.merge(lexer.span_ref());
694
695 if lexer.buffer[0] != b':' {
696 }
698 if lexer.buffer == self.bin_label {
699 if r == Rule::BinLabelEnd {
700 lexer.begin(Mode::Bin);
701 } else {
702 lexer.begin(Mode::Text);
703 }
704 } else {
705 if r == Rule::TextLabelEnd && lexer.buffer[0] == b'\r' {
706 lexer.buffer.remove(0);
707 }
708 self.buffer2.extend(&lexer.buffer);
709 }
710 }
711 r @ (Rule::BinLabelNLChar | Rule::TextLabelNewLine) => {
712 self.span2.merge(lexer.span_ref());
714
715 if r == Rule::TextLabelNewLine && lexer.buffer[0] == b'\r' {
716 lexer.buffer.remove(0);
717 }
718 self.buffer2.extend(&lexer.buffer);
719 }
720 Rule::BinLabelAnyChar | Rule::TextLabelAnyChar => {
721 self.span2.merge(lexer.span_ref());
722 self.buffer2.extend(&lexer.buffer);
723 }
724 Rule::LeftBrace => {
725 lexer.begin(Mode::Script);
726 lexer.clear();
727 lexer.accum();
728 self.span2 = lexer.span();
729 }
730 Rule::ScriptNotBraces => {
731 self.span2.merge(lexer.span_ref());
732 }
733 Rule::ScriptLeftBrace => {
734 self.span2.merge(lexer.span_ref());
735 self.script_curly_nest_count += 1;
736 }
737 Rule::ScriptRightBrace => {
738 self.span2.merge(lexer.span_ref());
739 if self.script_curly_nest_count != 0 {
740 self.script_curly_nest_count -= 1;
741 } else {
742 lexer.buffer.pop();
743 let s = take_str(lexer)?;
744 yield_term(lexer, TokenID::Str, arena.str(s), self.span2);
745 lexer.begin(Mode::Expr);
746 }
747 }
748 Rule::ScriptNewLine => {
749 self.span2.merge(lexer.span_ref());
751 }
752 Rule::HexConst => {
753 lexer.buffer.drain(0..2);
754 let s = take_str(lexer)?;
755 let val = parse_i64(s.as_str(), 16)
756 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
757 yield_term(lexer, TokenID::Int, arena.int(val), lexer.span());
758 }
759 Rule::BaseConst => {
760 let s = take_str(lexer)?;
761 let (base_str, digits) = s.split_once('\'').ok_or(ParlexError {
762 message: format!("missing `'` separator"),
763 span: Some(lexer.span()),
764 })?;
765 let base: u32 = base_str
766 .parse()
767 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
768 let val = parse_i64(digits, base)
769 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
770 yield_term(lexer, TokenID::Int, arena.int(val), lexer.span());
771 }
772 Rule::CharHex => {
773 let mut s = take_str(lexer)?;
774 s.drain(0..4);
775 let val = parse_i64(s.as_str(), 16)
776 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
777 yield_term(lexer, TokenID::Int, arena.int(val), lexer.span());
778 }
779 Rule::CharOct => {
780 let mut s = take_str(lexer)?;
781 s.drain(0..3);
782 let val = parse_i64(s.as_str(), 8)
783 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
784 yield_term(lexer, TokenID::Int, arena.int(val), lexer.span());
785 }
786 Rule::CharNewLine1 | Rule::CharNewLine2 | Rule::CharNewLine4 => {
787 yield_term(lexer, TokenID::Int, arena.int('\n' as i64), lexer.span());
789 }
790 Rule::CharNotBackslash => {
791 let mut s = take_str(lexer)?;
792 s.drain(0..2);
793 let val = s.chars().next().ok_or(ParlexError {
794 message: format!("invalid char"),
795 span: Some(lexer.span()),
796 })? as i64;
797 yield_term(lexer, TokenID::Int, arena.int(val), lexer.span());
798 }
799 Rule::CharCtrl => {
800 let mut s = take_str(lexer)?;
801 s.drain(0..4);
802 let val = s.chars().next().ok_or(ParlexError {
803 message: format!("invalid char"),
804 span: Some(lexer.span()),
805 })? as i64
806 - '@' as i64;
807 yield_term(lexer, TokenID::Int, arena.int(val), lexer.span());
808 }
809 Rule::CharDel1 | Rule::CharDel2 => {
810 yield_term(lexer, TokenID::Int, arena.int('\x7F' as i64), lexer.span());
811 }
812 Rule::CharEsc => {
813 yield_term(lexer, TokenID::Int, arena.int('\x1B' as i64), lexer.span());
814 }
815 Rule::CharBell => {
816 yield_term(
817 lexer,
818 TokenID::Int,
819 arena.int('\u{0007}' as i64),
820 lexer.span(),
821 );
822 }
823 Rule::CharBackspace => {
824 yield_term(
825 lexer,
826 TokenID::Int,
827 arena.int('\u{0008}' as i64),
828 lexer.span(),
829 );
830 }
831 Rule::CharFormFeed => {
832 yield_term(
833 lexer,
834 TokenID::Int,
835 arena.int('\u{000C}' as i64),
836 lexer.span(),
837 );
838 }
839 Rule::CharNewLine3 => {
840 yield_term(lexer, TokenID::Int, arena.int('\n' as i64), lexer.span());
841 }
842 Rule::CharCarriageReturn => {
843 yield_term(lexer, TokenID::Int, arena.int('\r' as i64), lexer.span());
844 }
845 Rule::CharTab => {
846 yield_term(lexer, TokenID::Int, arena.int('\t' as i64), lexer.span());
847 }
848 Rule::CharVerticalTab => {
849 yield_term(
850 lexer,
851 TokenID::Int,
852 arena.int('\u{000B}' as i64),
853 lexer.span(),
854 );
855 }
856 Rule::CharAny => {
857 let mut s = take_str(lexer)?;
858 s.drain(0..3);
859 let val = s.chars().next().ok_or(ParlexError {
860 message: format!("invalid char"),
861 span: Some(lexer.span()),
862 })? as i64;
863 yield_term(lexer, TokenID::Int, arena.int(val), lexer.span());
864 }
865 Rule::OctConst => {
866 let s = take_str(lexer)?;
867 let val = parse_i64(s.as_str(), 8)
868 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
869 yield_term(lexer, TokenID::Int, arena.int(val), lexer.span());
870 }
871 Rule::DecConst => {
872 let s = take_str(lexer)?;
873 let val = parse_i64(s.as_str(), 10)
874 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
875 yield_term(lexer, TokenID::Int, arena.int(val), lexer.span());
876 }
877 Rule::FPConst => {
878 let s = take_str(lexer)?;
879 let val: f64 = s
880 .parse()
881 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
882 yield_term(lexer, TokenID::Real, arena.real(val), lexer.span());
883 }
884 Rule::DoubleQuote => {
885 lexer.begin(Mode::Str);
886 lexer.clear();
887 lexer.accum();
888 }
889 Rule::SingleQuote => {
890 lexer.begin(Mode::Atom);
891 lexer.clear();
892 lexer.accum();
893 }
894 Rule::StrAtomCharHex => {
895 let len = lexer.buffer.len();
896 let b: u8 = parse_i64(
897 str::from_utf8(&lexer.buffer[len - 2..])
898 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?,
899 16,
900 )
901 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?
902 .try_into()
903 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
904 lexer.buffer.truncate(len - 4);
905 lexer.buffer.push(b);
906 }
907 Rule::StrAtomCharOct => {
908 let slash_pos = lexer.buffer.iter().rposition(|&b| b == b'\\').unwrap();
909 let b: u8 = parse_i64(
910 str::from_utf8(&lexer.buffer[slash_pos + 1..])
911 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?,
912 8,
913 )
914 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?
915 .try_into()
916 .map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?;
917 lexer.buffer.truncate(slash_pos);
918 lexer.buffer.push(b);
919 }
920 Rule::StrAtomCharCtrl => {
921 let len = lexer.buffer.len();
922 let b = lexer.buffer[len - 1] - b'@';
923 lexer.buffer.truncate(len - 3);
924 lexer.buffer.push(b);
925 }
926 Rule::StrAtomCharDel1 => {
927 let idx = lexer.buffer.len() - 2;
928 lexer.buffer.truncate(idx);
929 lexer.buffer.push(b'\x7F');
930 }
931 Rule::StrAtomCharDel2 => {
932 let idx = lexer.buffer.len() - 3;
933 lexer.buffer.truncate(idx);
934 lexer.buffer.push(b'\x7F');
935 }
936 Rule::StrAtomCharEsc => {
937 let idx = lexer.buffer.len() - 2;
938 lexer.buffer.truncate(idx);
939 lexer.buffer.push(b'\x1B');
940 }
941 Rule::StrAtomCharBell => {
942 let idx = lexer.buffer.len() - 2;
943 lexer.buffer.truncate(idx);
944 lexer.buffer.push(b'\x07');
945 }
946 Rule::StrAtomCharBackspace => {
947 let idx = lexer.buffer.len() - 2;
948 lexer.buffer.truncate(idx);
949 lexer.buffer.push(b'\x08');
950 }
951 Rule::StrAtomCharFormFeed => {
952 let idx = lexer.buffer.len() - 2;
953 lexer.buffer.truncate(idx);
954 lexer.buffer.push(b'\x0C');
955 }
956 Rule::StrAtomCharNewLine => {
957 let idx = lexer.buffer.len() - 2;
958 lexer.buffer.truncate(idx);
959 lexer.buffer.push(b'\n');
960 }
961 Rule::StrAtomCharCarriageReturn => {
962 let idx = lexer.buffer.len() - 2;
963 lexer.buffer.truncate(idx);
964 lexer.buffer.push(b'\r');
965 }
966 Rule::StrAtomCharTab => {
967 let idx = lexer.buffer.len() - 2;
968 lexer.buffer.truncate(idx);
969 lexer.buffer.push(b'\t');
970 }
971 Rule::StrAtomVerticalTab => {
972 let idx = lexer.buffer.len() - 2;
973 lexer.buffer.truncate(idx);
974 lexer.buffer.push(b'\x0B');
975 }
976 Rule::StrAtomCharSkipNewLine => {
977 lexer.buffer.pop();
979 let idx = lexer.buffer.len() - 1;
980 if lexer.buffer[idx] == b'\r' {
981 lexer.buffer.pop();
982 }
983 lexer.buffer.pop();
984 }
985 Rule::StrAtomCharAny | Rule::StrAtomCharBackslash => {
986 let idx = lexer.buffer.len() - 2;
987 lexer.buffer.remove(idx);
988 }
989 Rule::StrChar | Rule::AtomChar | Rule::StrAtomCarriageReturn => {}
990 Rule::StrDoubleQuote => {
991 lexer.begin(Mode::Expr);
992 lexer.buffer.pop();
993 let s = take_str(lexer)?;
994 yield_term(lexer, TokenID::Str, arena.str(s), lexer.span());
995 }
996 Rule::AtomSingleQuote => {
997 lexer.begin(Mode::Expr);
998 lexer.buffer.pop();
999 let s = take_str(lexer)?;
1000 yield_term(lexer, TokenID::Atom, arena.atom(s), lexer.span());
1001 }
1002 Rule::AtomLeftParen => {
1003 lexer.begin(Mode::Expr);
1004 self.nest_count += 1;
1005 let mut s = take_str(lexer)?;
1006 s.truncate(s.len() - 2);
1007 yield_term(lexer, TokenID::Func, arena.atom(s), lexer.span());
1008 }
1009 Rule::AtomLeftBrace => {}
1010 Rule::StrLeftBrace => {
1011 lexer.begin(Mode::Expr);
1012 self.nest_count += 1;
1013 self.curly_nest_count += 1;
1014 let mut s = take_str(lexer)?;
1015 s.pop();
1016 yield_term(lexer, TokenID::Str, arena.str(s), lexer.span());
1017 let op_tab_idx = arena.lookup_oper("++");
1018 yield_optab(
1019 lexer,
1020 TokenID::AtomOper,
1021 arena.atom("++"),
1022 op_tab_idx,
1023 lexer.span(),
1024 );
1025 yield_id(lexer, TokenID::LeftParen);
1026 }
1027 Rule::StrAtomNewLine => {
1028 }
1030 Rule::Error => {
1031 let s = take_str(lexer)?;
1032 return Err(ParlexError {
1033 message: format!("error on lexeme {:?}", s),
1034 span: Some(lexer.span()),
1035 });
1036 }
1037 Rule::End => {
1038 if lexer.mode() == Mode::Expr {
1039 yield_id(lexer, TokenID::End);
1040 } else {
1041 return Err(ParlexError {
1042 message: format!("unexpected end of stream"),
1043 span: Some(lexer.span()),
1044 });
1045 }
1046 }
1047 }
1048
1049 log::trace!(
1050 "ACTION end: mode {:?}, rule {:?}, buf {:?}, buf2 {:?}, label {:?}, accum {}",
1051 lexer.mode(),
1052 rule,
1053 str::from_utf8(&lexer.buffer),
1054 str::from_utf8(&self.buffer2),
1055 str::from_utf8(&self.bin_label),
1056 lexer.accum_flag,
1057 );
1058
1059 Ok(())
1060 }
1061}
1062
1063pub struct TermLexer<I>
1113where
1114 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
1115{
1116 pub(crate) lexer: Lexer<I, TermLexerDriver<I>, Arena>,
1119}
1120
1121impl<I> TermLexer<I>
1122where
1123 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
1124{
1125 pub fn try_new(input: I) -> Result<Self, ParlexError> {
1159 let driver = TermLexerDriver {
1160 _marker: PhantomData,
1161 nest_count: 0,
1162 comment_nest_count: 0,
1163 curly_nest_count: 0,
1164 script_curly_nest_count: 0,
1165 bin_count: 0,
1166 bin_label: Vec::new(),
1167 date_format: String::new(),
1168 buffer2: Vec::new(),
1169 span2: Span::default(),
1170 };
1171 let lexer = Lexer::try_new(input, driver)?;
1172 Ok(Self { lexer })
1173 }
1174}
1175
1176impl<I> TryNextWithContext<Arena, LexerStats> for TermLexer<I>
1177where
1178 I: TryNextWithContext<Arena, Item = u8, Error: std::fmt::Display + 'static>,
1179{
1180 type Item = TermToken;
1182
1183 type Error = ParlexError;
1185
1186 fn try_next_with_context(
1209 &mut self,
1210 context: &mut Arena,
1211 ) -> Result<Option<TermToken>, ParlexError> {
1212 self.lexer.try_next_with_context(context)
1213 }
1214
1215 fn stats(&self) -> LexerStats {
1216 self.lexer.stats()
1217 }
1218}
1219
1220#[cfg(test)]
1222mod tests {
1223 use arena_terms::View;
1224 use parlex::Token;
1225 use try_next::IterInput;
1226
1227 use super::*;
1228
1229 fn lex(arena: &mut Arena, s: &str) -> Vec<TermToken> {
1230 let input = IterInput::from(s.bytes());
1231 let mut lexer = TermLexer::try_new(input).expect("cannot create lexer");
1232 lexer.try_collect_with_context(arena).expect("lexer error")
1233 }
1234
1235 #[test]
1236 fn test_dates() {
1237 let _ = env_logger::builder().is_test(true).try_init();
1238 let mut arena = Arena::new();
1239 const DATES: &[(&str, u8)] = &[
1240 ("date{-5381856000000}", 0),
1241 ("date{-5381830320000}", 1),
1242 ("date{-5381830311000}", 2),
1243 ("date{-5381830310999}", 3),
1244 ("date{1799-06-16}", 0),
1245 ("date{1799-06-16Z}", 0),
1246 ("date{1799-06-16 Z}", 0),
1247 ("date{1799-06-16-00:00}", 0),
1248 ("date{1799-06-16 -00:00}", 0),
1249 ("date{1799-06-16T07:08}", 1),
1250 ("date{1799-06-16T07:08:09}", 2),
1251 ("date{1799-06-16T07:08:09Z}", 2),
1252 ("date{1799-06-16T07:08:09.001Z}", 3),
1253 ("date{1799-06-16T07:08:09 Z}", 2),
1254 ("date{1799-06-16T07:08:09.001 Z}", 3),
1255 ("date{1799-06-16T07:08:09+00:00}", 2),
1256 ("date{1799-06-16T07:08:09.001+00:00}", 3),
1257 ("date{1799-06-16T07:08:09 +00:00}", 2),
1258 ("date{1799-06-16T07:08:09.001 +00:00}", 3),
1259 ("date{1799-06-16T07:08:09Z}", 2),
1260 ("date{1799-06-16T07:08:09.001Z}", 3),
1261 ("date{1799-06-16 07:08:09 Z}", 2),
1262 ("date{1799-06-16T07:08:09.001 Z}", 3),
1263 ("date{1799-06-16 07:08:09+00:00}", 2),
1264 ("date{1799-06-16T07:08:09.001+00:00}", 3),
1265 ("date{1799-06-16 07:08:09 +00:00}", 2),
1266 ("date{1799-06-16 07:08:09.001 +00:00}", 3),
1267 ("date{1799-06-16T07:08Z}", 1),
1268 ("date{1799-06-16T07:08 Z }", 1),
1269 ("date{ 1799-06-16T07:08+00:00}", 1),
1270 ("date{ 1799-06-16T07:08 +00:00 }", 1),
1271 ("date{06/16/1799Z}", 0),
1272 ("date{06/16/1799 Z}", 0),
1273 ("date{06/16/1799+00:00}", 0),
1274 ("date{06/16/1799 +00:00}", 0),
1275 ("date{06/16/1799 07:08Z}", 1),
1276 ("date{06/16/1799 07:08:09Z}", 2),
1277 ("date{06/16/1799 07:08:09.001Z}", 3),
1278 ("date{06/16/1799 07:08 Z}", 1),
1279 ("date{06/16/1799 07:08:09 Z}", 2),
1280 ("date{06/16/1799 07:08:09.001 Z}", 3),
1281 ("date{06/16/1799 07:08+00:00}", 1),
1282 ("date{06/16/1799 07:08:09+00:00}", 2),
1283 ("date{06/16/1799 07:08:09.001+00:00}", 3),
1284 ("date{06/16/1799 07:08 +00:00}", 1),
1285 ("date{06/16/1799 07:08:09 +00:00}", 2),
1286 ("date{06/16/1799 07:08:09.001 +00:00}", 3),
1287 ("date{16-Jun-1799Z}", 0),
1288 ("date{16-jun-1799 Z}", 0),
1289 ("date{16-JUN-1799+00:00}", 0),
1290 ("date{16-Jun-1799 +00:00}", 0),
1291 ("date{16-Jun-1799 07:08Z}", 1),
1292 ("date{16-JUN-1799 07:08:09Z}", 2),
1293 ("date{16-Jun-1799 07:08:09.001Z}", 3),
1294 ("date{16-Jun-1799 07:08 Z}", 1),
1295 ("date{16-jun-1799 07:08:09 Z}", 2),
1296 ("date{16-Jun-1799 07:08:09.001 Z}", 3),
1297 ("date{16-Jun-1799 07:08+00:00}", 1),
1298 ("date{16-Jun-1799 07:08:09+00:00}", 2),
1299 ("date{16-Jun-1799 07:08:09.001+00:00}", 3),
1300 ("date{16-Jun-1799 07:08 +00:00}", 1),
1301 ("date{16-Jun-1799 07:08:09 +00:00}", 2),
1302 ("date{16-Jun-1799 07:08:09.001 +00:00}", 3),
1303 ];
1304 for (s, k) in DATES {
1305 let mut ts = lex(&mut arena, s);
1306 let tok = ts.remove(0);
1307 assert_eq!(tok.token_id, TokenID::Date);
1308 let term = Term::try_from(tok.value).unwrap();
1309 let d = term.unpack_date(&arena).unwrap();
1310 assert_eq!(
1311 d,
1312 match k {
1313 0 => -5381856000000,
1314 1 => -5381830320000,
1315 2 => -5381830311000,
1316 3 => -5381830310999,
1317 _ => unreachable!(),
1318 }
1319 );
1320 }
1321 }
1322
1323 #[test]
1324 fn test_atoms() {
1325 let mut arena = Arena::new();
1326 let ts = lex(&mut arena, "\na+foo-x '^&%^&%^&%''abc' 'AAA'");
1327 dbg!(&ts);
1328 assert!(ts.len() == 9);
1329 assert!(ts.iter().take(ts.len() - 1).all(|t| {
1330 t.span().unwrap().start.line == 1
1331 && matches!(
1332 Term::try_from(t.value.clone())
1333 .unwrap()
1334 .view(&arena)
1335 .unwrap(),
1336 View::Atom(_)
1337 )
1338 }));
1339 }
1340
1341 #[test]
1342 fn test_bin() {
1343 let mut arena = Arena::new();
1344 let ts = lex(
1345 &mut arena,
1346 "% single line comment\nbin{3:\x00\x01\x02 eob:\x00\x01:aaa\x02:eob eob\n\x00\neob eob\r\n\x00\r\neob\r\n}\r\nhex{ 0203 0405 FE }",
1347 );
1348 dbg!(&ts);
1349 assert!(ts.len() == 3);
1350 assert!(matches!(
1351 Term::try_from(ts[0].value.clone())
1352 .unwrap()
1353 .view(&arena)
1354 .unwrap(),
1355 View::Bin(_)
1356 ));
1357 match Term::try_from(ts[0].value.clone())
1358 .unwrap()
1359 .view(&arena)
1360 .unwrap()
1361 {
1362 View::Bin(bytes) => assert!(bytes == &[0, 1, 2, 0, 1, 58, 97, 97, 97, 2, 0, 0,]),
1363 _ => unreachable!(),
1364 }
1365 }
1366
1367 #[test]
1368 fn test_text() {
1369 let mut arena = Arena::new();
1370 let ts = lex(
1371 &mut arena,
1372 "/* single /* line */ comment */\ntext{3:abc eob:de:aaa:eob eob\n0\neob eob\r\n1\r\neob\r\n}\r\n",
1373 );
1374 dbg!(&ts);
1375 assert!(ts.len() == 2);
1376 assert!(matches!(
1377 Term::try_from(ts[0].value.clone())
1378 .unwrap()
1379 .view(&arena)
1380 .unwrap(),
1381 View::Str(_)
1382 ));
1383 match Term::try_from(ts[0].value.clone())
1384 .unwrap()
1385 .view(&arena)
1386 .unwrap()
1387 {
1388 View::Str(s) => assert!(s == "abcde:aaa01"),
1389 _ => unreachable!(),
1390 }
1391 }
1392
1393 #[test]
1394 fn test_texts() {
1395 let mut arena = Arena::new();
1396 let ts = lex(
1397 &mut arena,
1398 "/* single [ ( { /* line */ comment */\n\"hello\" {hello} text{5:hello} text{e:hello:e} text{e:h:e e:e:e 2:ll e:o:e} text{\ne\nhello\ne}",
1399 );
1400 dbg!(&ts);
1401 assert!(ts.len() == 7);
1402 assert!(matches!(
1403 Term::try_from(ts[0].value.clone())
1404 .unwrap()
1405 .view(&arena)
1406 .unwrap(),
1407 View::Str(_)
1408 ));
1409 assert!(ts.iter().take(ts.len() - 1).all(|t| {
1410 match Term::try_from(t.value.clone())
1411 .unwrap()
1412 .view(&arena)
1413 .unwrap()
1414 {
1415 View::Str(s) => s == "hello",
1416 _ => false,
1417 }
1418 }));
1419 }
1420
1421 #[test]
1422 fn test_integers() {
1423 let mut arena = Arena::new();
1424 let ts = lex(&mut arena, "[2'01010001111, 10'123, 36'AZ]");
1425 assert!(ts.len() == 8);
1426 assert!(matches!(ts[1].token_id, TokenID::Int));
1427 }
1428
1429 #[test]
1430 fn lex_string_subs() {
1431 let _ = env_logger::builder().is_test(true).try_init();
1432 let arena = &mut Arena::new();
1433 let ts = lex(arena, "\"aaa{1 + 2}bbb{3 * 4}ccc\"");
1434 assert_eq!(ts.len(), 18);
1435 let t0: Term = ts[0].value.clone().try_into().unwrap();
1436 let t1: Term = ts[8].value.clone().try_into().unwrap();
1437 let t2: Term = ts[16].value.clone().try_into().unwrap();
1438 assert_eq!(t0.unpack_str(arena).unwrap(), "aaa");
1439 assert_eq!(t1.unpack_str(arena).unwrap(), "bbb");
1440 assert_eq!(t2.unpack_str(arena).unwrap(), "ccc");
1441 }
1442}