1use crate::oper::{Assoc, Fixity, OperArg, OperDef, OperDefs};
15use crate::parser::TokenID;
16use anyhow::{Error, Result, anyhow, bail};
17use arena_terms::{Arena, Term, View};
18use chrono::{DateTime, FixedOffset, Utc};
19use smartstring::alias::String;
20use std::io::{self, BufReader, Read};
21use std::iter::FusedIterator;
22use std::mem;
23
24include!(concat!(env!("OUT_DIR"), "/lexer_data.rs"));
25
26#[derive(Debug, Clone, Copy, Default)]
27pub enum Value {
28 #[default]
29 None,
30 Term(Term),
31 Index(usize),
32}
33
34macro_rules! impl_tryfrom_value {
35 ( $( $Variant:ident => $ty:ty ),+ $(,)? ) => {
36 $(
37 impl ::core::convert::TryFrom<Value> for $ty {
38 type Error = ::anyhow::Error;
39 fn try_from(v: Value) -> ::anyhow::Result<Self> {
40 match v {
41 Value::$Variant(x) => Ok(x),
42 _ => ::anyhow::bail!(
43 "invalid value: expected {}",
44 stringify!($Variant),
45 ),
46 }
47 }
48 }
49 )+
50 };
51}
52
53impl_tryfrom_value! {
54 Term => Term,
55 Index => usize,
56}
57
58impl TryFrom<Value> for Option<Term> {
59 type Error = Error;
60 fn try_from(v: Value) -> Result<Self> {
61 match v {
62 Value::None => Ok(None),
63 Value::Term(x) => Ok(Some(x)),
64 _ => ::anyhow::bail!("invalid value: expected Term or None"),
65 }
66 }
67}
68
69#[derive(Debug, Clone)]
70pub struct TermToken {
71 pub token_id: TokenID,
72 pub value: Value,
73 pub line_no: usize,
74 pub op_tab_index: Option<usize>,
75}
76
77impl TermToken {
78 #[must_use]
79 pub fn new(token_id: TokenID, value: Value, line_no: usize) -> Self {
80 Self {
81 token_id,
82 value,
83 line_no,
84 op_tab_index: None,
85 }
86 }
87}
88
89impl Token for TermToken {
90 type TokenID = TokenID;
91
92 fn token_id(&self) -> Self::TokenID {
93 self.token_id
94 }
95 fn line_no(&self) -> usize {
96 self.line_no
97 }
98}
99
100fn parse_date_to_epoch(s: &str, fmt: Option<&str>) -> Result<i64> {
101 let dt_fixed: DateTime<FixedOffset> = match fmt {
102 None => DateTime::parse_from_rfc3339(s)?,
103 Some(layout) => DateTime::parse_from_str(s, layout)?,
104 };
105 let dt_utc = dt_fixed.with_timezone(&Utc);
106 Ok(dt_utc.timestamp_millis())
107}
108
109fn parse_i64(s: &str, base: u32) -> Result<i64> {
110 if s.is_empty() {
111 return Ok(0);
112 }
113 match i64::from_str_radix(s, base) {
114 Ok(n) => Ok(n.try_into()?),
115 Err(e) if e.kind() == &std::num::IntErrorKind::InvalidDigit => {
116 bail!("digit not valid for base")
117 }
118 Err(_) => bail!("number overflowed u64"),
119 }
120}
121
122pub struct TermLexer<I>
123where
124 I: FusedIterator<Item = u8>,
125{
126 ctx: LexerCtx<I, <Self as Lexer<Arena>>::LexerData, <Self as Lexer<Arena>>::Token>,
127 pub opers: OperDefs,
128 nest_count: isize,
129 comment_nest_count: isize,
130 curly_nest_count: isize,
131 script_curly_nest_count: isize,
132 bin_count: isize,
133 bin_label: Vec<u8>,
134 date_format: String,
135}
136
137impl<I> TermLexer<I>
138where
139 I: FusedIterator<Item = u8>,
140{
141 pub fn try_new(input: I, opers: Option<OperDefs>) -> Result<Self> {
142 Ok(Self {
143 ctx: LexerCtx::try_new(input)?,
144 opers: match opers {
145 Some(opers) => opers,
146 None => OperDefs::new(),
147 },
148 nest_count: 0,
149 comment_nest_count: 0,
150 curly_nest_count: 0,
151 script_curly_nest_count: 0,
152 bin_count: 0,
153 bin_label: Vec::new(),
154 date_format: String::new(),
155 })
156 }
157
158 fn yield_id(&mut self, token_id: TokenID) {
159 self.yield_token(TermToken {
161 token_id,
162 value: Value::None,
163 line_no: self.ctx().line_no,
164 op_tab_index: None,
165 });
166 }
167
168 fn yield_term(&mut self, token_id: TokenID, term: Term) {
169 self.yield_token(TermToken {
170 token_id,
171 value: Value::Term(term),
172 line_no: self.ctx().line_no,
173 op_tab_index: None,
174 });
175 }
176
177 fn yield_index(&mut self, token_id: TokenID, index: usize) {
178 self.yield_token(TermToken {
179 token_id,
180 value: Value::Index(index),
181 line_no: self.ctx().line_no,
182 op_tab_index: None,
183 });
184 }
185
186 fn yield_optab(&mut self, token_id: TokenID, term: Term, op_tab_index: Option<usize>) {
187 self.yield_token(TermToken {
188 token_id,
189 value: Value::Term(term),
190 line_no: self.ctx().line_no,
191 op_tab_index,
192 });
193 }
194}
195
196impl<I> Lexer<Arena> for TermLexer<I>
197where
198 I: FusedIterator<Item = u8>,
199{
200 type Input = I;
201 type LexerData = LexData;
202 type Token = TermToken;
203
204 fn ctx(&self) -> &LexerCtx<Self::Input, Self::LexerData, Self::Token> {
205 &self.ctx
206 }
207
208 fn ctx_mut(&mut self) -> &mut LexerCtx<Self::Input, Self::LexerData, Self::Token> {
209 &mut self.ctx
210 }
211
212 fn action(
213 &mut self,
214 arena: &mut Arena,
215 rule: <Self::LexerData as LexerData>::LexerRule,
216 ) -> Result<()> {
217 log::trace!(
218 "ACTION begin: mode {:?}, rule {:?}, buf {:?}, buf2 {:?}, label {:?}, accum {}",
219 self.ctx().mode,
220 rule,
221 str::from_utf8(&self.ctx().buffer),
222 str::from_utf8(&self.ctx().buffer2),
223 str::from_utf8(&self.bin_label),
224 self.ctx().accum_flag,
225 );
226 match rule {
227 Rule::Empty => {
228 unreachable!()
229 }
230 Rule::LineComment => {}
231 Rule::CommentStart => {
232 if self.comment_nest_count == 0 {
233 self.begin(Mode::Comment);
234 }
235 self.comment_nest_count += 1;
236 }
237 Rule::CommentEnd => {
238 self.comment_nest_count -= 1;
239 if self.comment_nest_count == 0 {
240 self.begin(Mode::Expr);
241 }
242 }
243 Rule::CommentChar | Rule::ExprSpace | Rule::CommentAnyChar => {}
244 Rule::ExprNewLine | Rule::CommentNewLine => {
245 self.ctx_mut().line_no += 1;
246 }
247 Rule::LeftParen => {
248 self.nest_count += 1;
249 self.yield_id(TokenID::LeftParen);
250 }
251 Rule::RightParen => {
252 self.nest_count -= 1;
253 self.yield_id(TokenID::RightParen);
254 }
255 Rule::LeftBrack => {
256 self.nest_count += 1;
257 self.yield_id(TokenID::LeftBrack);
258 }
259 Rule::RightBrack => {
260 self.nest_count -= 1;
261 self.yield_id(TokenID::RightBrack);
262 }
263 Rule::Comma => {
264 self.yield_id(TokenID::Comma);
265 }
266 Rule::Pipe => {
267 self.yield_id(TokenID::Pipe);
268 }
269 Rule::RightBrace => {
270 self.nest_count -= 1;
271 self.curly_nest_count -= 1;
272 if self.curly_nest_count >= 0 {
273 self.begin(Mode::Str);
274 self.yield_id(TokenID::RightParen);
275 let op_tab_idx = self.opers.lookup("++");
276 self.yield_optab(TokenID::AtomOper, arena.atom("++"), op_tab_idx);
277 self.clear();
278 self.accum();
279 } else {
280 self.yield_term(TokenID::Error, arena.str("}"));
281 }
282 }
283 Rule::Func => {
284 self.nest_count += 1;
285 self.ctx_mut().buffer.pop();
286 let s = self.take_str()?;
287 let op_tab_idx = self.opers.lookup(&s);
288 let op_tab = self.opers.get(op_tab_idx);
289
290 let atom = arena.atom(s);
291
292 if op_tab.is_oper() {
293 let (has_empty, has_non_empty) =
294 [Fixity::Prefix, Fixity::Infix, Fixity::Postfix]
295 .iter()
296 .filter_map(|f| {
297 op_tab
298 .get_op_def(*f)
299 .map(|x| x.args.len() <= OperDef::required_arity(*f))
300 })
301 .fold((false, false), |(e, ne), is_empty| {
302 if is_empty { (true, ne) } else { (e, true) }
303 });
304
305 match (has_empty, has_non_empty) {
306 (false, false) => unreachable!(),
307 (true, false) => {
308 self.yield_optab(TokenID::AtomOper, atom, op_tab_idx);
309 self.yield_id(TokenID::LeftParen);
310 }
311 (false, true) => {
312 self.yield_optab(TokenID::FuncOper, atom, op_tab_idx);
313 }
314 (true, true) => bail!("arguments conflict in op defs for {:?}", atom),
315 }
316 } else {
317 self.yield_optab(TokenID::Func, atom, op_tab_idx);
318 }
319 }
320 Rule::Var => {
321 let s = self.take_str()?;
322 self.yield_term(TokenID::Var, arena.var(s));
323 }
324 Rule::Atom => {
325 if self.ctx().buffer == b"." && self.nest_count == 0 {
326 self.yield_id(TokenID::Dot);
327 self.yield_id(TokenID::End);
328 } else {
329 let s = self.take_str()?;
330 let op_tab_idx = self.opers.lookup(&s);
331 let op_tab = self.opers.get(op_tab_idx);
332 let atom = arena.atom(s);
333 if op_tab.is_oper() {
334 self.yield_optab(TokenID::AtomOper, atom, op_tab_idx);
335 } else {
336 self.yield_optab(TokenID::Atom, atom, op_tab_idx);
337 }
338 }
339 }
340
341 Rule::DateEpoch => {
342 let mut s = self.take_str()?;
343 s.pop();
344 s.drain(0..5);
345 let s = s.trim();
346 let d = parse_i64(s, 10)?;
347 self.yield_term(TokenID::Date, arena.date(d));
348 }
349 Rule::Date => {
350 self.begin(Mode::Date);
351 self.clear();
352 self.ctx_mut().buffer2.clear();
353 self.date_format.clear();
354 }
355 Rule::Date1 => {
356 self.begin(Mode::Time);
357 self.date_format.push_str("%Y-%m-%d");
358 self.extend_buffer2_with_buffer();
359 }
360 Rule::Date2 => {
361 self.begin(Mode::Time);
362 self.date_format.push_str("%m/%d/%Y");
363 self.extend_buffer2_with_buffer();
364 }
365 Rule::Date3 => {
366 self.begin(Mode::Time);
367 self.date_format.push_str("%d-%b-%Y");
368 self.extend_buffer2_with_buffer();
369 }
370 Rule::Time1 => {
371 self.begin(Mode::Zone);
372 self.date_format.push_str("T%H:%M:%S%.f");
373 self.extend_buffer2_with_buffer();
374 }
375 Rule::Time2 => {
376 self.begin(Mode::Zone);
377 self.date_format.push_str("T%H:%M:%S");
378 self.extend_buffer2_with_buffer();
379 self.ctx_mut().buffer2.extend(b":00");
380 }
381 Rule::Time3 => {
382 self.begin(Mode::Zone);
383 self.date_format.push_str(" %H:%M:%S%.f");
384 self.extend_buffer2_with_buffer();
385 }
386 Rule::Time4 => {
387 self.begin(Mode::Zone);
388 self.date_format.push_str(" %H:%M:%S");
389 self.extend_buffer2_with_buffer();
390 self.ctx_mut().buffer2.extend(b":00");
391 }
392 Rule::Time5 => {
393 self.begin(Mode::Zone);
394 self.date_format.push_str(" %I:%M:%S%.f %p");
395 self.extend_buffer2_with_buffer();
396 }
397 Rule::Time6 => {
398 self.begin(Mode::Zone);
399 self.date_format.push_str(" %I:%M:%S %p");
400 let ctx = &mut self.ctx_mut();
401 ctx.buffer2.extend(&ctx.buffer[..ctx.buffer.len() - 3]);
402 ctx.buffer2.extend(b":00");
403 ctx.buffer2.extend(&ctx.buffer[ctx.buffer.len() - 3..]);
404 }
405 Rule::Zone1 => {
406 if self.ctx().mode == Mode::Time {
407 self.date_format.push_str(" %H:%M:%S");
408 self.ctx_mut().buffer2.extend(b" 00:00:00");
409 }
410 self.begin(Mode::Expr);
411 self.date_format.push_str("%:z");
412 self.ctx_mut().buffer2.extend(b"+00:00");
413 let s = self.take_str2()?;
414 let d = parse_date_to_epoch(s.trim_end(), Some(self.date_format.as_str()))?;
415 self.yield_term(TokenID::Date, arena.date(d));
416 }
417 Rule::Zone2 => {
418 if self.ctx().mode == Mode::Time {
419 self.date_format.push_str(" %H:%M:%S");
420 self.ctx_mut().buffer2.extend(b" 00:00:00");
421 }
422 self.begin(Mode::Expr);
423 if self.ctx.buffer[0] == b' ' {
424 self.date_format.push(' ');
425 }
426 self.date_format.push_str("%:z");
427 self.ctx_mut().buffer.pop();
428 self.extend_buffer2_with_buffer();
429 let s = self.take_str2()?;
430 let d = parse_date_to_epoch(s.trim_end(), Some(self.date_format.as_str()))?;
431 self.yield_term(TokenID::Date, arena.date(d));
432 }
433 Rule::TimeRightBrace => {
434 self.begin(Mode::Expr);
435 self.date_format.push_str(" %H:%M:%S%:z");
436 self.ctx_mut().buffer2.extend(b" 00:00:00+00:00");
437 let s = self.take_str2()?;
438 let d = parse_date_to_epoch(&s, Some(self.date_format.as_str()))?;
439 self.yield_term(TokenID::Date, arena.date(d));
440 }
441 Rule::ZoneRightBrace => {
442 self.begin(Mode::Expr);
443 self.date_format.push_str("%:z");
444 self.ctx_mut().buffer2.extend(b"+00:00");
445 let s = self.take_str2()?;
446 let d = parse_date_to_epoch(&s, Some(self.date_format.as_str()))?;
447 self.yield_term(TokenID::Date, arena.date(d));
448 }
449
450 Rule::Hex => {
451 self.begin(Mode::Hex);
452 self.ctx_mut().buffer2.clear();
453 }
454 Rule::HexSpace => {}
455 Rule::HexNewLine => {
456 self.ctx_mut().line_no += 1;
457 }
458 Rule::HexByte => {
459 let s = str::from_utf8(&self.ctx().buffer)?;
460 match u8::from_str_radix(s, 16) {
461 Ok(b) => {
462 self.ctx_mut().buffer2.push(b);
463 }
464 Err(_) => {
465 self.yield_term(TokenID::Error, arena.str(s));
466 }
467 }
468 }
469 Rule::HexRightBrace => {
470 self.ctx_mut().buffer.pop();
471 let bytes = self.take_bytes2();
472 self.yield_term(TokenID::Bin, arena.bin(bytes));
473 self.begin(Mode::Expr);
474 }
475 Rule::Bin => {
476 self.begin(Mode::Bin);
477 }
478 Rule::Text => {
479 self.begin(Mode::Text);
480 }
481 Rule::BinSpace | Rule::TextSpace => {}
482 Rule::BinNewLine | Rule::TextNewLine => {
483 self.ctx_mut().line_no += 1;
484 }
485 r @ (Rule::BinCount | Rule::TextCount) => {
486 let s = str::from_utf8(&self.ctx().buffer)?;
487 let mut s = String::from(s.trim());
488 if &s[s.len() - 1..] == "\n" {
489 self.ctx_mut().line_no += 1;
490 }
491 if &s[s.len() - 1..] == ":" {
492 s.pop();
493 }
494 self.bin_count = s.parse()?;
495 if self.bin_count > 0 {
496 if r == Rule::BinCount {
497 self.begin(Mode::BinCount);
498 } else {
499 self.begin(Mode::TextCount);
500 }
501 self.clear();
502 self.accum();
503 }
504 }
505 r @ (Rule::BinCountAnyChar | Rule::TextCountAnyChar) => {
506 self.bin_count -= 1;
507 if self.bin_count == 0 {
508 self.extend_buffer2_with_buffer();
509 self.clear();
510 if r == Rule::BinCountAnyChar {
511 self.begin(Mode::Bin);
512 } else {
513 self.begin(Mode::Text);
514 }
515 }
516 }
517 r @ (Rule::BinCountNLChar | Rule::TextCountNewLine) => {
518 self.ctx_mut().line_no += 1;
519 if self.ctx_mut().buffer[0] == b'\r' {
520 self.ctx_mut().buffer.remove(0);
521 }
522 self.bin_count -= 1;
523 if self.bin_count == 0 {
524 self.extend_buffer2_with_buffer();
525 self.clear();
526 if r == Rule::BinCountNLChar {
527 self.begin(Mode::Bin);
528 } else {
529 self.begin(Mode::Text);
530 }
531 }
532 }
533 r @ (Rule::BinRightBrace | Rule::TextRightBrace) => {
534 if r == Rule::BinRightBrace {
535 let bytes = self.take_bytes2();
536 self.yield_term(TokenID::Bin, arena.bin(bytes));
537 } else {
538 let s = self.take_str2()?;
539 self.yield_term(TokenID::Str, arena.str(s));
540 }
541 self.begin(Mode::Expr);
542 }
543 r @ (Rule::BinLabelStart | Rule::TextLabelStart) => {
544 self.bin_label.clear();
545 let len = self.ctx().buffer.len();
546 if self.ctx_mut().buffer[len - 1] == b'\n' {
547 self.ctx_mut().line_no += 1;
548 self.bin_label.push(b'\n');
549 self.ctx_mut().buffer.pop();
550 let len = self.ctx().buffer.len();
551 if self.ctx_mut().buffer[len - 1] == b'\r' {
552 self.bin_label.insert(0, b'\r');
553 self.ctx_mut().buffer.pop();
554 }
555 } else {
556 let len = self.ctx().buffer.len();
557 let b = self.ctx().buffer[len - 1];
558 self.bin_label.push(b);
559 self.ctx_mut().buffer.pop();
560 }
561
562 let buf = mem::take(&mut self.ctx_mut().buffer);
563 self.bin_label.extend(buf);
564
565 if r == Rule::BinLabelStart {
566 self.begin(Mode::BinLabel);
567 } else {
568 self.begin(Mode::TextLabel);
569 }
570 }
571 r @ (Rule::BinLabelEnd | Rule::TextLabelEnd) => {
572 if self.ctx_mut().buffer[0] != b':' {
573 self.ctx_mut().line_no += 1;
574 }
575 if self.ctx().buffer == self.bin_label {
576 if r == Rule::BinLabelEnd {
577 self.begin(Mode::Bin);
578 } else {
579 self.begin(Mode::Text);
580 }
581 } else {
582 if r == Rule::TextLabelEnd && self.ctx_mut().buffer[0] == b'\r' {
583 self.ctx_mut().buffer.remove(0);
584 }
585 self.extend_buffer2_with_buffer();
586 }
587 }
588 r @ (Rule::BinLabelNLChar | Rule::TextLabelNewLine) => {
589 self.ctx_mut().line_no += 1;
590 if r == Rule::TextLabelNewLine && self.ctx_mut().buffer[0] == b'\r' {
591 self.ctx_mut().buffer.remove(0);
592 }
593 self.extend_buffer2_with_buffer();
594 }
595 Rule::BinLabelAnyChar | Rule::TextLabelAnyChar => {
596 self.extend_buffer2_with_buffer();
597 }
598 Rule::LeftBrace => {
599 self.begin(Mode::Script);
600 self.clear();
601 self.accum();
602 }
603 Rule::ScriptNotBraces => {}
604 Rule::ScriptLeftBrace => {
605 self.script_curly_nest_count += 1;
606 }
607 Rule::ScriptRightBrace => {
608 if self.script_curly_nest_count != 0 {
609 self.script_curly_nest_count -= 1;
610 } else {
611 self.ctx_mut().buffer.pop();
612 let s = self.take_str()?;
613 self.yield_term(TokenID::Str, arena.str(s));
614 self.begin(Mode::Expr);
615 }
616 }
617 Rule::ScriptNewLine => {
618 self.ctx_mut().line_no += 1;
619 }
620 Rule::HexConst => {
621 self.ctx_mut().buffer.drain(0..2);
622 let s = self.take_str()?;
623 let val = parse_i64(s.as_str(), 16)?;
624 self.yield_term(TokenID::Int, arena.int(val));
625 }
626 Rule::BaseConst => {
627 let s = self.take_str()?;
628 let (base_str, digits) =
629 s.split_once('\'').ok_or(anyhow!("missing ' separator"))?;
630 let base: u32 = base_str.parse().map_err(|_| anyhow!("invalid base"))?;
631 let val = parse_i64(digits, base)?;
632 self.yield_term(TokenID::Int, arena.int(val));
633 }
634 Rule::CharHex => {
635 let mut s = self.take_str()?;
636 s.drain(0..4);
637 let val = parse_i64(s.as_str(), 16)?;
638 self.yield_term(TokenID::Int, arena.int(val));
639 }
640 Rule::CharOct => {
641 let mut s = self.take_str()?;
642 s.drain(0..3);
643 let val = parse_i64(s.as_str(), 8)?;
644 self.yield_term(TokenID::Int, arena.int(val));
645 }
646 Rule::CharNewLine1 | Rule::CharNewLine2 | Rule::CharNewLine4 => {
647 self.ctx_mut().line_no += 1;
648 self.yield_term(TokenID::Int, arena.int('\n' as i64));
649 }
650 Rule::CharNotBackslash => {
651 let mut s = self.take_str()?;
652 s.drain(0..2);
653 let val = s.chars().next().ok_or(anyhow!("invalid char"))? as i64;
654 self.yield_term(TokenID::Int, arena.int(val));
655 }
656 Rule::CharCtrl => {
657 let mut s = self.take_str()?;
658 s.drain(0..4);
659 let val = s.chars().next().ok_or(anyhow!("invalid char"))? as i64 - '@' as i64;
660 self.yield_term(TokenID::Int, arena.int(val));
661 }
662 Rule::CharDel1 | Rule::CharDel2 => {
663 self.yield_term(TokenID::Int, arena.int('\x7F' as i64));
664 }
665 Rule::CharEsc => {
666 self.yield_term(TokenID::Int, arena.int('\x1B' as i64));
667 }
668 Rule::CharBell => {
669 self.yield_term(TokenID::Int, arena.int('\u{0007}' as i64));
670 }
671 Rule::CharBackspace => {
672 self.yield_term(TokenID::Int, arena.int('\u{0008}' as i64));
673 }
674 Rule::CharFormFeed => {
675 self.yield_term(TokenID::Int, arena.int('\u{000C}' as i64));
676 }
677 Rule::CharNewLine3 => {
678 self.yield_term(TokenID::Int, arena.int('\n' as i64));
679 }
680 Rule::CharCarriageReturn => {
681 self.yield_term(TokenID::Int, arena.int('\r' as i64));
682 }
683 Rule::CharTab => {
684 self.yield_term(TokenID::Int, arena.int('\t' as i64));
685 }
686 Rule::CharVerticalTab => {
687 self.yield_term(TokenID::Int, arena.int('\u{000B}' as i64));
688 }
689 Rule::CharAny => {
690 let mut s = self.take_str()?;
691 s.drain(0..3);
692 let val = s.chars().next().ok_or(anyhow!("invalid char"))? as i64;
693 self.yield_term(TokenID::Int, arena.int(val));
694 }
695 Rule::OctConst => {
696 let s = self.take_str()?;
697 let val = parse_i64(s.as_str(), 8)?;
698 self.yield_term(TokenID::Int, arena.int(val));
699 }
700 Rule::DecConst => {
701 let s = self.take_str()?;
702 let val = parse_i64(s.as_str(), 10)?;
703 self.yield_term(TokenID::Int, arena.int(val));
704 }
705 Rule::FPConst => {
706 let s = self.take_str()?;
707 let val: f64 = s.parse()?;
708 self.yield_term(TokenID::Real, arena.real(val));
709 }
710 Rule::DoubleQuote => {
711 self.begin(Mode::Str);
712 self.clear();
713 self.accum();
714 }
715 Rule::SingleQuote => {
716 self.begin(Mode::Atom);
717 self.clear();
718 self.accum();
719 }
720 Rule::StrAtomCharHex => {
721 let len = self.ctx().buffer.len();
722 let b: u8 = parse_i64(str::from_utf8(&self.ctx_mut().buffer[len - 2..])?, 16)?
723 .try_into()?;
724 self.ctx_mut().buffer.truncate(len - 4);
725 self.ctx_mut().buffer.push(b);
726 }
727 Rule::StrAtomCharOct => {
728 let slash_pos = self.ctx().buffer.iter().rposition(|&b| b == b'\\').unwrap();
729 let b: u8 = parse_i64(str::from_utf8(&self.ctx().buffer[slash_pos + 1..])?, 8)?
730 .try_into()?;
731 self.ctx_mut().buffer.truncate(slash_pos);
732 self.ctx_mut().buffer.push(b);
733 }
734 Rule::StrAtomCharCtrl => {
735 let len = self.ctx().buffer.len();
736 let b = self.ctx_mut().buffer[len - 1] - b'@';
737 self.ctx_mut().buffer.truncate(len - 3);
738 self.ctx_mut().buffer.push(b);
739 }
740 Rule::StrAtomCharDel1 => {
741 let idx = self.ctx().buffer.len() - 2;
742 self.ctx_mut().buffer.truncate(idx);
743 self.ctx_mut().buffer.push(b'\x7F');
744 }
745 Rule::StrAtomCharDel2 => {
746 let idx = self.ctx().buffer.len() - 3;
747 self.ctx_mut().buffer.truncate(idx);
748 self.ctx_mut().buffer.push(b'\x7F');
749 }
750 Rule::StrAtomCharEsc => {
751 let idx = self.ctx().buffer.len() - 2;
752 self.ctx_mut().buffer.truncate(idx);
753 self.ctx_mut().buffer.push(b'\x1B');
754 }
755 Rule::StrAtomCharBell => {
756 let idx = self.ctx().buffer.len() - 2;
757 self.ctx_mut().buffer.truncate(idx);
758 self.ctx_mut().buffer.push(b'\x07');
759 }
760 Rule::StrAtomCharBackspace => {
761 let idx = self.ctx().buffer.len() - 2;
762 self.ctx_mut().buffer.truncate(idx);
763 self.ctx_mut().buffer.push(b'\x08');
764 }
765 Rule::StrAtomCharFormFeed => {
766 let idx = self.ctx().buffer.len() - 2;
767 self.ctx_mut().buffer.truncate(idx);
768 self.ctx_mut().buffer.push(b'\x0C');
769 }
770 Rule::StrAtomCharNewLine => {
771 let idx = self.ctx().buffer.len() - 2;
772 self.ctx_mut().buffer.truncate(idx);
773 self.ctx_mut().buffer.push(b'\n');
774 }
775 Rule::StrAtomCharCarriageReturn => {
776 let idx = self.ctx().buffer.len() - 2;
777 self.ctx_mut().buffer.truncate(idx);
778 self.ctx_mut().buffer.push(b'\r');
779 }
780 Rule::StrAtomCharTab => {
781 let idx = self.ctx().buffer.len() - 2;
782 self.ctx_mut().buffer.truncate(idx);
783 self.ctx_mut().buffer.push(b'\t');
784 }
785 Rule::StrAtomVerticalTab => {
786 let idx = self.ctx().buffer.len() - 2;
787 self.ctx_mut().buffer.truncate(idx);
788 self.ctx_mut().buffer.push(b'\x0B');
789 }
790 Rule::StrAtomCharSkipNewLine => {
791 self.ctx_mut().line_no += 1;
792 self.ctx_mut().buffer.pop();
793 let idx = self.ctx().buffer.len() - 1;
794 if self.ctx_mut().buffer[idx] == b'\r' {
795 self.ctx_mut().buffer.pop();
796 }
797 self.ctx_mut().buffer.pop();
798 }
799 Rule::StrAtomCharAny | Rule::StrAtomCharBackslash => {
800 let idx = self.ctx().buffer.len() - 2;
801 self.ctx_mut().buffer.remove(idx);
802 }
803 Rule::StrChar | Rule::AtomChar | Rule::StrAtomCarriageReturn => {}
804 Rule::StrDoubleQuote => {
805 self.begin(Mode::Expr);
806 self.ctx_mut().buffer.pop();
807 let s = self.take_str()?;
808 self.yield_term(TokenID::Str, arena.str(s));
809 }
810 Rule::AtomSingleQuote => {
811 self.begin(Mode::Expr);
812 self.ctx_mut().buffer.pop();
813 let s = self.take_str()?;
814 self.yield_term(TokenID::Atom, arena.atom(s));
815 }
816 Rule::AtomLeftParen => {
817 self.begin(Mode::Expr);
818 self.nest_count += 1;
819 let mut s = self.take_str()?;
820 s.truncate(s.len() - 2);
821 self.yield_term(TokenID::Func, arena.atom(s));
822 }
823 Rule::AtomLeftBrace => {}
824 Rule::StrLeftBrace => {
825 self.begin(Mode::Expr);
826 self.nest_count += 1;
827 self.curly_nest_count += 1;
828 let mut s = self.take_str()?;
829 s.pop();
830 self.yield_term(TokenID::Str, arena.str(s));
831 let op_tab_idx = self.opers.lookup("++");
832 self.yield_optab(TokenID::AtomOper, arena.atom("++"), op_tab_idx);
833 self.yield_id(TokenID::LeftParen);
834 }
835 Rule::StrAtomNewLine => {
836 self.ctx_mut().line_no += 1;
837 }
838 Rule::Error => {
839 let s = self.take_str()?;
840 self.yield_term(TokenID::Error, arena.str(s));
841 }
842 Rule::End => {
843 if self.ctx().mode == Mode::Expr {
844 self.yield_id(TokenID::End);
845 } else {
846 self.yield_term(TokenID::Error, arena.str("<END>"));
847 }
848 }
849 }
850
851 log::trace!(
852 "ACTION end: mode {:?}, rule {:?}, buf {:?}, buf2 {:?}, label {:?}, accum {}",
853 self.ctx().mode,
854 rule,
855 str::from_utf8(&self.ctx().buffer),
856 str::from_utf8(&self.ctx().buffer2),
857 str::from_utf8(&self.bin_label),
858 self.ctx().accum_flag,
859 );
860
861 Ok(())
862 }
863}
864
865#[cfg(test)]
866mod tests {
867 use super::*;
868
869 fn lex(arena: &mut Arena, s: &str) -> Result<Vec<TermToken>> {
870 let mut lx = TermLexer::try_new(s.bytes().fuse(), Some(OperDefs::new()))?;
871 Ok(lx.try_collect(arena)?)
872 }
873
874 #[test]
875 fn test_dates() {
876 let _ = env_logger::builder().is_test(true).try_init();
877 let mut arena = Arena::new();
878 const DATES: &[(&str, u8)] = &[
879 ("date{-5381856000000}", 0),
880 ("date{-5381830320000}", 1),
881 ("date{-5381830311000}", 2),
882 ("date{-5381830310999}", 3),
883 ("date{1799-06-16}", 0),
884 ("date{1799-06-16Z}", 0),
885 ("date{1799-06-16 Z}", 0),
886 ("date{1799-06-16-00:00}", 0),
887 ("date{1799-06-16 -00:00}", 0),
888 ("date{1799-06-16T07:08}", 1),
889 ("date{1799-06-16T07:08:09}", 2),
890 ("date{1799-06-16T07:08:09Z}", 2),
891 ("date{1799-06-16T07:08:09.001Z}", 3),
892 ("date{1799-06-16T07:08:09 Z}", 2),
893 ("date{1799-06-16T07:08:09.001 Z}", 3),
894 ("date{1799-06-16T07:08:09+00:00}", 2),
895 ("date{1799-06-16T07:08:09.001+00:00}", 3),
896 ("date{1799-06-16T07:08:09 +00:00}", 2),
897 ("date{1799-06-16T07:08:09.001 +00:00}", 3),
898 ("date{1799-06-16T07:08:09Z}", 2),
899 ("date{1799-06-16T07:08:09.001Z}", 3),
900 ("date{1799-06-16 07:08:09 Z}", 2),
901 ("date{1799-06-16T07:08:09.001 Z}", 3),
902 ("date{1799-06-16 07:08:09+00:00}", 2),
903 ("date{1799-06-16T07:08:09.001+00:00}", 3),
904 ("date{1799-06-16 07:08:09 +00:00}", 2),
905 ("date{1799-06-16 07:08:09.001 +00:00}", 3),
906 ("date{1799-06-16T07:08Z}", 1),
907 ("date{1799-06-16T07:08 Z }", 1),
908 ("date{ 1799-06-16T07:08+00:00}", 1),
909 ("date{ 1799-06-16T07:08 +00:00 }", 1),
910 ("date{06/16/1799Z}", 0),
911 ("date{06/16/1799 Z}", 0),
912 ("date{06/16/1799+00:00}", 0),
913 ("date{06/16/1799 +00:00}", 0),
914 ("date{06/16/1799 07:08Z}", 1),
915 ("date{06/16/1799 07:08:09Z}", 2),
916 ("date{06/16/1799 07:08:09.001Z}", 3),
917 ("date{06/16/1799 07:08 Z}", 1),
918 ("date{06/16/1799 07:08:09 Z}", 2),
919 ("date{06/16/1799 07:08:09.001 Z}", 3),
920 ("date{06/16/1799 07:08+00:00}", 1),
921 ("date{06/16/1799 07:08:09+00:00}", 2),
922 ("date{06/16/1799 07:08:09.001+00:00}", 3),
923 ("date{06/16/1799 07:08 +00:00}", 1),
924 ("date{06/16/1799 07:08:09 +00:00}", 2),
925 ("date{06/16/1799 07:08:09.001 +00:00}", 3),
926 ("date{16-Jun-1799Z}", 0),
927 ("date{16-jun-1799 Z}", 0),
928 ("date{16-JUN-1799+00:00}", 0),
929 ("date{16-Jun-1799 +00:00}", 0),
930 ("date{16-Jun-1799 07:08Z}", 1),
931 ("date{16-JUN-1799 07:08:09Z}", 2),
932 ("date{16-Jun-1799 07:08:09.001Z}", 3),
933 ("date{16-Jun-1799 07:08 Z}", 1),
934 ("date{16-jun-1799 07:08:09 Z}", 2),
935 ("date{16-Jun-1799 07:08:09.001 Z}", 3),
936 ("date{16-Jun-1799 07:08+00:00}", 1),
937 ("date{16-Jun-1799 07:08:09+00:00}", 2),
938 ("date{16-Jun-1799 07:08:09.001+00:00}", 3),
939 ("date{16-Jun-1799 07:08 +00:00}", 1),
940 ("date{16-Jun-1799 07:08:09 +00:00}", 2),
941 ("date{16-Jun-1799 07:08:09.001 +00:00}", 3),
942 ];
943 for (s, k) in DATES {
944 let mut ts = lex(&mut arena, s).unwrap();
945 let tok = ts.remove(0);
946 assert_eq!(tok.token_id, TokenID::Date);
947 let term = Term::try_from(tok.value).unwrap();
948 let d = term.unpack_date(&arena).unwrap();
949 assert_eq!(
950 d,
951 match k {
952 0 => -5381856000000,
953 1 => -5381830320000,
954 2 => -5381830311000,
955 3 => -5381830310999,
956 _ => unreachable!(),
957 }
958 );
959 }
960 }
961
962 #[test]
963 fn test_atoms() {
964 let mut arena = Arena::new();
965 let ts = lex(&mut arena, "\na+foo-x '^&%^&%^&%''abc' 'AAA'").unwrap();
966 dbg!(&ts);
967 assert!(ts.len() == 9);
968 assert!(ts.iter().take(ts.len() - 1).all(|t| {
969 t.line_no == 2
970 && matches!(
971 Term::try_from(t.value.clone())
972 .unwrap()
973 .view(&arena)
974 .unwrap(),
975 View::Atom(_)
976 )
977 }));
978 }
979
980 #[test]
981 fn test_bin() {
982 let mut arena = Arena::new();
983 let ts = lex(&mut arena, "% single line comment\nbin{3:\x00\x01\x02 eob:\x00\x01:aaa\x02:eob eob\n\x00\neob eob\r\n\x00\r\neob\r\n}\r\nhex{ 0203 0405 FE }").unwrap();
984 dbg!(&ts);
985 assert!(ts.len() == 3);
986 assert!(matches!(
987 Term::try_from(ts[0].value.clone())
988 .unwrap()
989 .view(&arena)
990 .unwrap(),
991 View::Bin(_)
992 ));
993 match Term::try_from(ts[0].value.clone())
994 .unwrap()
995 .view(&arena)
996 .unwrap()
997 {
998 View::Bin(bytes) => assert!(bytes == &[0, 1, 2, 0, 1, 58, 97, 97, 97, 2, 0, 0,]),
999 _ => unreachable!(),
1000 }
1001 }
1002
1003 #[test]
1004 fn test_text() {
1005 let mut arena = Arena::new();
1006 let ts = lex(&mut arena, "/* single /* line */ comment */\ntext{3:abc eob:de:aaa:eob eob\n0\neob eob\r\n1\r\neob\r\n}\r\n").unwrap();
1007 dbg!(&ts);
1008 assert!(ts.len() == 2);
1009 assert!(matches!(
1010 Term::try_from(ts[0].value.clone())
1011 .unwrap()
1012 .view(&arena)
1013 .unwrap(),
1014 View::Str(_)
1015 ));
1016 match Term::try_from(ts[0].value.clone())
1017 .unwrap()
1018 .view(&arena)
1019 .unwrap()
1020 {
1021 View::Str(s) => assert!(s == "abcde:aaa01"),
1022 _ => unreachable!(),
1023 }
1024 }
1025
1026 #[test]
1027 fn test_texts() {
1028 let mut arena = Arena::new();
1029 let ts = lex(&mut arena, "/* single [ ( { /* line */ comment */\n\"hello\" {hello} text{5:hello} text{e:hello:e} text{e:h:e e:e:e 2:ll e:o:e} text{\ne\nhello\ne}").unwrap();
1030 dbg!(&ts);
1031 assert!(ts.len() == 7);
1032 assert!(matches!(
1033 Term::try_from(ts[0].value.clone())
1034 .unwrap()
1035 .view(&arena)
1036 .unwrap(),
1037 View::Str(_)
1038 ));
1039 assert!(ts.iter().take(ts.len() - 1).all(|t| {
1040 match Term::try_from(t.value.clone())
1041 .unwrap()
1042 .view(&arena)
1043 .unwrap()
1044 {
1045 View::Str(s) => s == "hello",
1046 _ => false,
1047 }
1048 }));
1049 }
1050
1051 #[test]
1052 fn test_integers() {
1053 let mut arena = Arena::new();
1054 let ts = lex(&mut arena, "[2'01010001111, 10'123, 36'AZ]").unwrap();
1055 assert!(ts.len() == 8);
1056 assert!(matches!(ts[1].token_id, TokenID::Int));
1057 }
1058
1059 #[test]
1060 fn lex_string_subs() {
1061 let _ = env_logger::builder().is_test(true).try_init();
1062 let arena = &mut Arena::new();
1063 let ts = lex(arena, "\"aaa{1 + 2}bbb{3 * 4}ccc\"").unwrap();
1064 assert_eq!(ts.len(), 18);
1065 let t0: Term = ts[0].value.clone().try_into().unwrap();
1066 let t1: Term = ts[8].value.clone().try_into().unwrap();
1067 let t2: Term = ts[16].value.clone().try_into().unwrap();
1068 assert_eq!(t0.unpack_str(arena).unwrap(), "aaa");
1069 assert_eq!(t1.unpack_str(arena).unwrap(), "bbb");
1070 assert_eq!(t2.unpack_str(arena).unwrap(), "ccc");
1071 }
1072}