arena_terms_parser/
lexer.rs

1//! Lexer for Prolog‑like terms with operator definitions.
2//!
3//! This module defines the [`TermLexer`] type, which tokenizes term input and
4//! yields [`TermToken`] values. The lexer is generated by the [`alex`] tool and
5//! includes pattern actions that build numbers, atoms, strings, dates, and manage
6//! nested parentheses and comments. It also integrates operator definitions
7//! (`OperDefs`) to recognise operator names and yields special tokens with
8//! operator table indices. See the crate documentation for details.
9//!
10//! [`TermLexer`]: struct.TermLexer
11//! [`TermToken`]: struct.TermToken
12//! [`alex`]: https://crates.io/crates/parlex-gen
13
14use crate::oper::{Assoc, Fixity, OperArg, OperDef, OperDefs};
15use crate::parser::TokenID;
16use anyhow::{Error, Result, anyhow, bail};
17use arena_terms::{Arena, Term, View};
18use chrono::{DateTime, FixedOffset, Utc};
19use smartstring::alias::String;
20use std::io::{self, BufReader, Read};
21use std::iter::FusedIterator;
22use std::mem;
23
24include!(concat!(env!("OUT_DIR"), "/lexer_data.rs"));
25
26#[derive(Debug, Clone, Copy, Default)]
27pub enum Value {
28    #[default]
29    None,
30    Term(Term),
31    Index(usize),
32}
33
34macro_rules! impl_tryfrom_value {
35    ( $( $Variant:ident => $ty:ty ),+ $(,)? ) => {
36        $(
37            impl ::core::convert::TryFrom<Value> for $ty {
38                type Error = ::anyhow::Error;
39                fn try_from(v: Value) -> ::anyhow::Result<Self> {
40                    match v {
41                        Value::$Variant(x) => Ok(x),
42                        _ => ::anyhow::bail!(
43                            "invalid value: expected {}",
44                            stringify!($Variant),
45                        ),
46                    }
47                }
48            }
49        )+
50    };
51}
52
53impl_tryfrom_value! {
54    Term => Term,
55    Index => usize,
56}
57
58impl TryFrom<Value> for Option<Term> {
59    type Error = Error;
60    fn try_from(v: Value) -> Result<Self> {
61        match v {
62            Value::None => Ok(None),
63            Value::Term(x) => Ok(Some(x)),
64            _ => ::anyhow::bail!("invalid value: expected Term or None"),
65        }
66    }
67}
68
69#[derive(Debug, Clone)]
70pub struct TermToken {
71    pub token_id: TokenID,
72    pub value: Value,
73    pub line_no: usize,
74    pub op_tab_index: Option<usize>,
75}
76
77impl TermToken {
78    #[must_use]
79    pub fn new(token_id: TokenID, value: Value, line_no: usize) -> Self {
80        Self {
81            token_id,
82            value,
83            line_no,
84            op_tab_index: None,
85        }
86    }
87}
88
89impl Token for TermToken {
90    type TokenID = TokenID;
91
92    fn token_id(&self) -> Self::TokenID {
93        self.token_id
94    }
95    fn line_no(&self) -> usize {
96        self.line_no
97    }
98}
99
100fn parse_date_to_epoch(s: &str, fmt: Option<&str>) -> Result<i64> {
101    let dt_fixed: DateTime<FixedOffset> = match fmt {
102        None => DateTime::parse_from_rfc3339(s)?,
103        Some(layout) => DateTime::parse_from_str(s, layout)?,
104    };
105    let dt_utc = dt_fixed.with_timezone(&Utc);
106    Ok(dt_utc.timestamp_millis())
107}
108
109fn parse_i64(s: &str, base: u32) -> Result<i64> {
110    if s.is_empty() {
111        return Ok(0);
112    }
113    match i64::from_str_radix(s, base) {
114        Ok(n) => Ok(n.try_into()?),
115        Err(e) if e.kind() == &std::num::IntErrorKind::InvalidDigit => {
116            bail!("digit not valid for base")
117        }
118        Err(_) => bail!("number overflowed u64"),
119    }
120}
121
122pub struct TermLexer<I>
123where
124    I: FusedIterator<Item = u8>,
125{
126    ctx: LexerCtx<I, <Self as Lexer<Arena>>::LexerData, <Self as Lexer<Arena>>::Token>,
127    pub opers: OperDefs,
128    nest_count: isize,
129    comment_nest_count: isize,
130    curly_nest_count: isize,
131    script_curly_nest_count: isize,
132    bin_count: isize,
133    bin_label: Vec<u8>,
134    date_format: String,
135}
136
137impl<I> TermLexer<I>
138where
139    I: FusedIterator<Item = u8>,
140{
141    pub fn try_new(input: I, opers: Option<OperDefs>) -> Result<Self> {
142        Ok(Self {
143            ctx: LexerCtx::try_new(input)?,
144            opers: match opers {
145                Some(opers) => opers,
146                None => OperDefs::new(),
147            },
148            nest_count: 0,
149            comment_nest_count: 0,
150            curly_nest_count: 0,
151            script_curly_nest_count: 0,
152            bin_count: 0,
153            bin_label: Vec::new(),
154            date_format: String::new(),
155        })
156    }
157
158    fn yield_id(&mut self, token_id: TokenID) {
159        //self.clear();
160        self.yield_token(TermToken {
161            token_id,
162            value: Value::None,
163            line_no: self.ctx().line_no,
164            op_tab_index: None,
165        });
166    }
167
168    fn yield_term(&mut self, token_id: TokenID, term: Term) {
169        self.yield_token(TermToken {
170            token_id,
171            value: Value::Term(term),
172            line_no: self.ctx().line_no,
173            op_tab_index: None,
174        });
175    }
176
177    fn yield_index(&mut self, token_id: TokenID, index: usize) {
178        self.yield_token(TermToken {
179            token_id,
180            value: Value::Index(index),
181            line_no: self.ctx().line_no,
182            op_tab_index: None,
183        });
184    }
185
186    fn yield_optab(&mut self, token_id: TokenID, term: Term, op_tab_index: Option<usize>) {
187        self.yield_token(TermToken {
188            token_id,
189            value: Value::Term(term),
190            line_no: self.ctx().line_no,
191            op_tab_index,
192        });
193    }
194}
195
196impl<I> Lexer<Arena> for TermLexer<I>
197where
198    I: FusedIterator<Item = u8>,
199{
200    type Input = I;
201    type LexerData = LexData;
202    type Token = TermToken;
203
204    fn ctx(&self) -> &LexerCtx<Self::Input, Self::LexerData, Self::Token> {
205        &self.ctx
206    }
207
208    fn ctx_mut(&mut self) -> &mut LexerCtx<Self::Input, Self::LexerData, Self::Token> {
209        &mut self.ctx
210    }
211
212    fn action(
213        &mut self,
214        arena: &mut Arena,
215        rule: <Self::LexerData as LexerData>::LexerRule,
216    ) -> Result<()> {
217        log::trace!(
218            "ACTION begin: mode {:?}, rule {:?}, buf {:?}, buf2 {:?}, label {:?}, accum {}",
219            self.ctx().mode,
220            rule,
221            str::from_utf8(&self.ctx().buffer),
222            str::from_utf8(&self.ctx().buffer2),
223            str::from_utf8(&self.bin_label),
224            self.ctx().accum_flag,
225        );
226        match rule {
227            Rule::Empty => {
228                unreachable!()
229            }
230            Rule::LineComment => {}
231            Rule::CommentStart => {
232                if self.comment_nest_count == 0 {
233                    self.begin(Mode::Comment);
234                }
235                self.comment_nest_count += 1;
236            }
237            Rule::CommentEnd => {
238                self.comment_nest_count -= 1;
239                if self.comment_nest_count == 0 {
240                    self.begin(Mode::Expr);
241                }
242            }
243            Rule::CommentChar | Rule::ExprSpace | Rule::CommentAnyChar => {}
244            Rule::ExprNewLine | Rule::CommentNewLine => {
245                self.ctx_mut().line_no += 1;
246            }
247            Rule::LeftParen => {
248                self.nest_count += 1;
249                self.yield_id(TokenID::LeftParen);
250            }
251            Rule::RightParen => {
252                self.nest_count -= 1;
253                self.yield_id(TokenID::RightParen);
254            }
255            Rule::LeftBrack => {
256                self.nest_count += 1;
257                self.yield_id(TokenID::LeftBrack);
258            }
259            Rule::RightBrack => {
260                self.nest_count -= 1;
261                self.yield_id(TokenID::RightBrack);
262            }
263            Rule::Comma => {
264                self.yield_id(TokenID::Comma);
265            }
266            Rule::Pipe => {
267                self.yield_id(TokenID::Pipe);
268            }
269            Rule::RightBrace => {
270                self.nest_count -= 1;
271                self.curly_nest_count -= 1;
272                if self.curly_nest_count >= 0 {
273                    self.begin(Mode::Str);
274                    self.yield_id(TokenID::RightParen);
275                    let op_tab_idx = self.opers.lookup("++");
276                    self.yield_optab(TokenID::AtomOper, arena.atom("++"), op_tab_idx);
277                    self.clear();
278                    self.accum();
279                } else {
280                    self.yield_term(TokenID::Error, arena.str("}"));
281                }
282            }
283            Rule::Func => {
284                self.nest_count += 1;
285                self.ctx_mut().buffer.pop();
286                let s = self.take_str()?;
287                let op_tab_idx = self.opers.lookup(&s);
288                let op_tab = self.opers.get(op_tab_idx);
289
290                let atom = arena.atom(s);
291
292                if op_tab.is_oper() {
293                    let (has_empty, has_non_empty) =
294                        [Fixity::Prefix, Fixity::Infix, Fixity::Postfix]
295                            .iter()
296                            .filter_map(|f| {
297                                op_tab
298                                    .get_op_def(*f)
299                                    .map(|x| x.args.len() <= OperDef::required_arity(*f))
300                            })
301                            .fold((false, false), |(e, ne), is_empty| {
302                                if is_empty { (true, ne) } else { (e, true) }
303                            });
304
305                    match (has_empty, has_non_empty) {
306                        (false, false) => unreachable!(),
307                        (true, false) => {
308                            self.yield_optab(TokenID::AtomOper, atom, op_tab_idx);
309                            self.yield_id(TokenID::LeftParen);
310                        }
311                        (false, true) => {
312                            self.yield_optab(TokenID::FuncOper, atom, op_tab_idx);
313                        }
314                        (true, true) => bail!("arguments conflict in op defs for {:?}", atom),
315                    }
316                } else {
317                    self.yield_optab(TokenID::Func, atom, op_tab_idx);
318                }
319            }
320            Rule::Var => {
321                let s = self.take_str()?;
322                self.yield_term(TokenID::Var, arena.var(s));
323            }
324            Rule::Atom => {
325                if self.ctx().buffer == b"." && self.nest_count == 0 {
326                    self.yield_id(TokenID::Dot);
327                    self.yield_id(TokenID::End);
328                } else {
329                    let s = self.take_str()?;
330                    let op_tab_idx = self.opers.lookup(&s);
331                    let op_tab = self.opers.get(op_tab_idx);
332                    let atom = arena.atom(s);
333                    if op_tab.is_oper() {
334                        self.yield_optab(TokenID::AtomOper, atom, op_tab_idx);
335                    } else {
336                        self.yield_optab(TokenID::Atom, atom, op_tab_idx);
337                    }
338                }
339            }
340
341            Rule::DateEpoch => {
342                let mut s = self.take_str()?;
343                s.pop();
344                s.drain(0..5);
345                let s = s.trim();
346                let d = parse_i64(s, 10)?;
347                self.yield_term(TokenID::Date, arena.date(d));
348            }
349            Rule::Date => {
350                self.begin(Mode::Date);
351                self.clear();
352                self.ctx_mut().buffer2.clear();
353                self.date_format.clear();
354            }
355            Rule::Date1 => {
356                self.begin(Mode::Time);
357                self.date_format.push_str("%Y-%m-%d");
358                self.extend_buffer2_with_buffer();
359            }
360            Rule::Date2 => {
361                self.begin(Mode::Time);
362                self.date_format.push_str("%m/%d/%Y");
363                self.extend_buffer2_with_buffer();
364            }
365            Rule::Date3 => {
366                self.begin(Mode::Time);
367                self.date_format.push_str("%d-%b-%Y");
368                self.extend_buffer2_with_buffer();
369            }
370            Rule::Time1 => {
371                self.begin(Mode::Zone);
372                self.date_format.push_str("T%H:%M:%S%.f");
373                self.extend_buffer2_with_buffer();
374            }
375            Rule::Time2 => {
376                self.begin(Mode::Zone);
377                self.date_format.push_str("T%H:%M:%S");
378                self.extend_buffer2_with_buffer();
379                self.ctx_mut().buffer2.extend(b":00");
380            }
381            Rule::Time3 => {
382                self.begin(Mode::Zone);
383                self.date_format.push_str(" %H:%M:%S%.f");
384                self.extend_buffer2_with_buffer();
385            }
386            Rule::Time4 => {
387                self.begin(Mode::Zone);
388                self.date_format.push_str(" %H:%M:%S");
389                self.extend_buffer2_with_buffer();
390                self.ctx_mut().buffer2.extend(b":00");
391            }
392            Rule::Time5 => {
393                self.begin(Mode::Zone);
394                self.date_format.push_str(" %I:%M:%S%.f %p");
395                self.extend_buffer2_with_buffer();
396            }
397            Rule::Time6 => {
398                self.begin(Mode::Zone);
399                self.date_format.push_str(" %I:%M:%S %p");
400                let ctx = &mut self.ctx_mut();
401                ctx.buffer2.extend(&ctx.buffer[..ctx.buffer.len() - 3]);
402                ctx.buffer2.extend(b":00");
403                ctx.buffer2.extend(&ctx.buffer[ctx.buffer.len() - 3..]);
404            }
405            Rule::Zone1 => {
406                if self.ctx().mode == Mode::Time {
407                    self.date_format.push_str(" %H:%M:%S");
408                    self.ctx_mut().buffer2.extend(b" 00:00:00");
409                }
410                self.begin(Mode::Expr);
411                self.date_format.push_str("%:z");
412                self.ctx_mut().buffer2.extend(b"+00:00");
413                let s = self.take_str2()?;
414                let d = parse_date_to_epoch(s.trim_end(), Some(self.date_format.as_str()))?;
415                self.yield_term(TokenID::Date, arena.date(d));
416            }
417            Rule::Zone2 => {
418                if self.ctx().mode == Mode::Time {
419                    self.date_format.push_str(" %H:%M:%S");
420                    self.ctx_mut().buffer2.extend(b" 00:00:00");
421                }
422                self.begin(Mode::Expr);
423                if self.ctx.buffer[0] == b' ' {
424                    self.date_format.push(' ');
425                }
426                self.date_format.push_str("%:z");
427                self.ctx_mut().buffer.pop();
428                self.extend_buffer2_with_buffer();
429                let s = self.take_str2()?;
430                let d = parse_date_to_epoch(s.trim_end(), Some(self.date_format.as_str()))?;
431                self.yield_term(TokenID::Date, arena.date(d));
432            }
433            Rule::TimeRightBrace => {
434                self.begin(Mode::Expr);
435                self.date_format.push_str(" %H:%M:%S%:z");
436                self.ctx_mut().buffer2.extend(b" 00:00:00+00:00");
437                let s = self.take_str2()?;
438                let d = parse_date_to_epoch(&s, Some(self.date_format.as_str()))?;
439                self.yield_term(TokenID::Date, arena.date(d));
440            }
441            Rule::ZoneRightBrace => {
442                self.begin(Mode::Expr);
443                self.date_format.push_str("%:z");
444                self.ctx_mut().buffer2.extend(b"+00:00");
445                let s = self.take_str2()?;
446                let d = parse_date_to_epoch(&s, Some(self.date_format.as_str()))?;
447                self.yield_term(TokenID::Date, arena.date(d));
448            }
449
450            Rule::Hex => {
451                self.begin(Mode::Hex);
452                self.ctx_mut().buffer2.clear();
453            }
454            Rule::HexSpace => {}
455            Rule::HexNewLine => {
456                self.ctx_mut().line_no += 1;
457            }
458            Rule::HexByte => {
459                let s = str::from_utf8(&self.ctx().buffer)?;
460                match u8::from_str_radix(s, 16) {
461                    Ok(b) => {
462                        self.ctx_mut().buffer2.push(b);
463                    }
464                    Err(_) => {
465                        self.yield_term(TokenID::Error, arena.str(s));
466                    }
467                }
468            }
469            Rule::HexRightBrace => {
470                self.ctx_mut().buffer.pop();
471                let bytes = self.take_bytes2();
472                self.yield_term(TokenID::Bin, arena.bin(bytes));
473                self.begin(Mode::Expr);
474            }
475            Rule::Bin => {
476                self.begin(Mode::Bin);
477            }
478            Rule::Text => {
479                self.begin(Mode::Text);
480            }
481            Rule::BinSpace | Rule::TextSpace => {}
482            Rule::BinNewLine | Rule::TextNewLine => {
483                self.ctx_mut().line_no += 1;
484            }
485            r @ (Rule::BinCount | Rule::TextCount) => {
486                let s = str::from_utf8(&self.ctx().buffer)?;
487                let mut s = String::from(s.trim());
488                if &s[s.len() - 1..] == "\n" {
489                    self.ctx_mut().line_no += 1;
490                }
491                if &s[s.len() - 1..] == ":" {
492                    s.pop();
493                }
494                self.bin_count = s.parse()?;
495                if self.bin_count > 0 {
496                    if r == Rule::BinCount {
497                        self.begin(Mode::BinCount);
498                    } else {
499                        self.begin(Mode::TextCount);
500                    }
501                    self.clear();
502                    self.accum();
503                }
504            }
505            r @ (Rule::BinCountAnyChar | Rule::TextCountAnyChar) => {
506                self.bin_count -= 1;
507                if self.bin_count == 0 {
508                    self.extend_buffer2_with_buffer();
509                    self.clear();
510                    if r == Rule::BinCountAnyChar {
511                        self.begin(Mode::Bin);
512                    } else {
513                        self.begin(Mode::Text);
514                    }
515                }
516            }
517            r @ (Rule::BinCountNLChar | Rule::TextCountNewLine) => {
518                self.ctx_mut().line_no += 1;
519                if self.ctx_mut().buffer[0] == b'\r' {
520                    self.ctx_mut().buffer.remove(0);
521                }
522                self.bin_count -= 1;
523                if self.bin_count == 0 {
524                    self.extend_buffer2_with_buffer();
525                    self.clear();
526                    if r == Rule::BinCountNLChar {
527                        self.begin(Mode::Bin);
528                    } else {
529                        self.begin(Mode::Text);
530                    }
531                }
532            }
533            r @ (Rule::BinRightBrace | Rule::TextRightBrace) => {
534                if r == Rule::BinRightBrace {
535                    let bytes = self.take_bytes2();
536                    self.yield_term(TokenID::Bin, arena.bin(bytes));
537                } else {
538                    let s = self.take_str2()?;
539                    self.yield_term(TokenID::Str, arena.str(s));
540                }
541                self.begin(Mode::Expr);
542            }
543            r @ (Rule::BinLabelStart | Rule::TextLabelStart) => {
544                self.bin_label.clear();
545                let len = self.ctx().buffer.len();
546                if self.ctx_mut().buffer[len - 1] == b'\n' {
547                    self.ctx_mut().line_no += 1;
548                    self.bin_label.push(b'\n');
549                    self.ctx_mut().buffer.pop();
550                    let len = self.ctx().buffer.len();
551                    if self.ctx_mut().buffer[len - 1] == b'\r' {
552                        self.bin_label.insert(0, b'\r');
553                        self.ctx_mut().buffer.pop();
554                    }
555                } else {
556                    let len = self.ctx().buffer.len();
557                    let b = self.ctx().buffer[len - 1];
558                    self.bin_label.push(b);
559                    self.ctx_mut().buffer.pop();
560                }
561
562                let buf = mem::take(&mut self.ctx_mut().buffer);
563                self.bin_label.extend(buf);
564
565                if r == Rule::BinLabelStart {
566                    self.begin(Mode::BinLabel);
567                } else {
568                    self.begin(Mode::TextLabel);
569                }
570            }
571            r @ (Rule::BinLabelEnd | Rule::TextLabelEnd) => {
572                if self.ctx_mut().buffer[0] != b':' {
573                    self.ctx_mut().line_no += 1;
574                }
575                if self.ctx().buffer == self.bin_label {
576                    if r == Rule::BinLabelEnd {
577                        self.begin(Mode::Bin);
578                    } else {
579                        self.begin(Mode::Text);
580                    }
581                } else {
582                    if r == Rule::TextLabelEnd && self.ctx_mut().buffer[0] == b'\r' {
583                        self.ctx_mut().buffer.remove(0);
584                    }
585                    self.extend_buffer2_with_buffer();
586                }
587            }
588            r @ (Rule::BinLabelNLChar | Rule::TextLabelNewLine) => {
589                self.ctx_mut().line_no += 1;
590                if r == Rule::TextLabelNewLine && self.ctx_mut().buffer[0] == b'\r' {
591                    self.ctx_mut().buffer.remove(0);
592                }
593                self.extend_buffer2_with_buffer();
594            }
595            Rule::BinLabelAnyChar | Rule::TextLabelAnyChar => {
596                self.extend_buffer2_with_buffer();
597            }
598            Rule::LeftBrace => {
599                self.begin(Mode::Script);
600                self.clear();
601                self.accum();
602            }
603            Rule::ScriptNotBraces => {}
604            Rule::ScriptLeftBrace => {
605                self.script_curly_nest_count += 1;
606            }
607            Rule::ScriptRightBrace => {
608                if self.script_curly_nest_count != 0 {
609                    self.script_curly_nest_count -= 1;
610                } else {
611                    self.ctx_mut().buffer.pop();
612                    let s = self.take_str()?;
613                    self.yield_term(TokenID::Str, arena.str(s));
614                    self.begin(Mode::Expr);
615                }
616            }
617            Rule::ScriptNewLine => {
618                self.ctx_mut().line_no += 1;
619            }
620            Rule::HexConst => {
621                self.ctx_mut().buffer.drain(0..2);
622                let s = self.take_str()?;
623                let val = parse_i64(s.as_str(), 16)?;
624                self.yield_term(TokenID::Int, arena.int(val));
625            }
626            Rule::BaseConst => {
627                let s = self.take_str()?;
628                let (base_str, digits) =
629                    s.split_once('\'').ok_or(anyhow!("missing ' separator"))?;
630                let base: u32 = base_str.parse().map_err(|_| anyhow!("invalid base"))?;
631                let val = parse_i64(digits, base)?;
632                self.yield_term(TokenID::Int, arena.int(val));
633            }
634            Rule::CharHex => {
635                let mut s = self.take_str()?;
636                s.drain(0..4);
637                let val = parse_i64(s.as_str(), 16)?;
638                self.yield_term(TokenID::Int, arena.int(val));
639            }
640            Rule::CharOct => {
641                let mut s = self.take_str()?;
642                s.drain(0..3);
643                let val = parse_i64(s.as_str(), 8)?;
644                self.yield_term(TokenID::Int, arena.int(val));
645            }
646            Rule::CharNewLine1 | Rule::CharNewLine2 | Rule::CharNewLine4 => {
647                self.ctx_mut().line_no += 1;
648                self.yield_term(TokenID::Int, arena.int('\n' as i64));
649            }
650            Rule::CharNotBackslash => {
651                let mut s = self.take_str()?;
652                s.drain(0..2);
653                let val = s.chars().next().ok_or(anyhow!("invalid char"))? as i64;
654                self.yield_term(TokenID::Int, arena.int(val));
655            }
656            Rule::CharCtrl => {
657                let mut s = self.take_str()?;
658                s.drain(0..4);
659                let val = s.chars().next().ok_or(anyhow!("invalid char"))? as i64 - '@' as i64;
660                self.yield_term(TokenID::Int, arena.int(val));
661            }
662            Rule::CharDel1 | Rule::CharDel2 => {
663                self.yield_term(TokenID::Int, arena.int('\x7F' as i64));
664            }
665            Rule::CharEsc => {
666                self.yield_term(TokenID::Int, arena.int('\x1B' as i64));
667            }
668            Rule::CharBell => {
669                self.yield_term(TokenID::Int, arena.int('\u{0007}' as i64));
670            }
671            Rule::CharBackspace => {
672                self.yield_term(TokenID::Int, arena.int('\u{0008}' as i64));
673            }
674            Rule::CharFormFeed => {
675                self.yield_term(TokenID::Int, arena.int('\u{000C}' as i64));
676            }
677            Rule::CharNewLine3 => {
678                self.yield_term(TokenID::Int, arena.int('\n' as i64));
679            }
680            Rule::CharCarriageReturn => {
681                self.yield_term(TokenID::Int, arena.int('\r' as i64));
682            }
683            Rule::CharTab => {
684                self.yield_term(TokenID::Int, arena.int('\t' as i64));
685            }
686            Rule::CharVerticalTab => {
687                self.yield_term(TokenID::Int, arena.int('\u{000B}' as i64));
688            }
689            Rule::CharAny => {
690                let mut s = self.take_str()?;
691                s.drain(0..3);
692                let val = s.chars().next().ok_or(anyhow!("invalid char"))? as i64;
693                self.yield_term(TokenID::Int, arena.int(val));
694            }
695            Rule::OctConst => {
696                let s = self.take_str()?;
697                let val = parse_i64(s.as_str(), 8)?;
698                self.yield_term(TokenID::Int, arena.int(val));
699            }
700            Rule::DecConst => {
701                let s = self.take_str()?;
702                let val = parse_i64(s.as_str(), 10)?;
703                self.yield_term(TokenID::Int, arena.int(val));
704            }
705            Rule::FPConst => {
706                let s = self.take_str()?;
707                let val: f64 = s.parse()?;
708                self.yield_term(TokenID::Real, arena.real(val));
709            }
710            Rule::DoubleQuote => {
711                self.begin(Mode::Str);
712                self.clear();
713                self.accum();
714            }
715            Rule::SingleQuote => {
716                self.begin(Mode::Atom);
717                self.clear();
718                self.accum();
719            }
720            Rule::StrAtomCharHex => {
721                let len = self.ctx().buffer.len();
722                let b: u8 = parse_i64(str::from_utf8(&self.ctx_mut().buffer[len - 2..])?, 16)?
723                    .try_into()?;
724                self.ctx_mut().buffer.truncate(len - 4);
725                self.ctx_mut().buffer.push(b);
726            }
727            Rule::StrAtomCharOct => {
728                let slash_pos = self.ctx().buffer.iter().rposition(|&b| b == b'\\').unwrap();
729                let b: u8 = parse_i64(str::from_utf8(&self.ctx().buffer[slash_pos + 1..])?, 8)?
730                    .try_into()?;
731                self.ctx_mut().buffer.truncate(slash_pos);
732                self.ctx_mut().buffer.push(b);
733            }
734            Rule::StrAtomCharCtrl => {
735                let len = self.ctx().buffer.len();
736                let b = self.ctx_mut().buffer[len - 1] - b'@';
737                self.ctx_mut().buffer.truncate(len - 3);
738                self.ctx_mut().buffer.push(b);
739            }
740            Rule::StrAtomCharDel1 => {
741                let idx = self.ctx().buffer.len() - 2;
742                self.ctx_mut().buffer.truncate(idx);
743                self.ctx_mut().buffer.push(b'\x7F');
744            }
745            Rule::StrAtomCharDel2 => {
746                let idx = self.ctx().buffer.len() - 3;
747                self.ctx_mut().buffer.truncate(idx);
748                self.ctx_mut().buffer.push(b'\x7F');
749            }
750            Rule::StrAtomCharEsc => {
751                let idx = self.ctx().buffer.len() - 2;
752                self.ctx_mut().buffer.truncate(idx);
753                self.ctx_mut().buffer.push(b'\x1B');
754            }
755            Rule::StrAtomCharBell => {
756                let idx = self.ctx().buffer.len() - 2;
757                self.ctx_mut().buffer.truncate(idx);
758                self.ctx_mut().buffer.push(b'\x07');
759            }
760            Rule::StrAtomCharBackspace => {
761                let idx = self.ctx().buffer.len() - 2;
762                self.ctx_mut().buffer.truncate(idx);
763                self.ctx_mut().buffer.push(b'\x08');
764            }
765            Rule::StrAtomCharFormFeed => {
766                let idx = self.ctx().buffer.len() - 2;
767                self.ctx_mut().buffer.truncate(idx);
768                self.ctx_mut().buffer.push(b'\x0C');
769            }
770            Rule::StrAtomCharNewLine => {
771                let idx = self.ctx().buffer.len() - 2;
772                self.ctx_mut().buffer.truncate(idx);
773                self.ctx_mut().buffer.push(b'\n');
774            }
775            Rule::StrAtomCharCarriageReturn => {
776                let idx = self.ctx().buffer.len() - 2;
777                self.ctx_mut().buffer.truncate(idx);
778                self.ctx_mut().buffer.push(b'\r');
779            }
780            Rule::StrAtomCharTab => {
781                let idx = self.ctx().buffer.len() - 2;
782                self.ctx_mut().buffer.truncate(idx);
783                self.ctx_mut().buffer.push(b'\t');
784            }
785            Rule::StrAtomVerticalTab => {
786                let idx = self.ctx().buffer.len() - 2;
787                self.ctx_mut().buffer.truncate(idx);
788                self.ctx_mut().buffer.push(b'\x0B');
789            }
790            Rule::StrAtomCharSkipNewLine => {
791                self.ctx_mut().line_no += 1;
792                self.ctx_mut().buffer.pop();
793                let idx = self.ctx().buffer.len() - 1;
794                if self.ctx_mut().buffer[idx] == b'\r' {
795                    self.ctx_mut().buffer.pop();
796                }
797                self.ctx_mut().buffer.pop();
798            }
799            Rule::StrAtomCharAny | Rule::StrAtomCharBackslash => {
800                let idx = self.ctx().buffer.len() - 2;
801                self.ctx_mut().buffer.remove(idx);
802            }
803            Rule::StrChar | Rule::AtomChar | Rule::StrAtomCarriageReturn => {}
804            Rule::StrDoubleQuote => {
805                self.begin(Mode::Expr);
806                self.ctx_mut().buffer.pop();
807                let s = self.take_str()?;
808                self.yield_term(TokenID::Str, arena.str(s));
809            }
810            Rule::AtomSingleQuote => {
811                self.begin(Mode::Expr);
812                self.ctx_mut().buffer.pop();
813                let s = self.take_str()?;
814                self.yield_term(TokenID::Atom, arena.atom(s));
815            }
816            Rule::AtomLeftParen => {
817                self.begin(Mode::Expr);
818                self.nest_count += 1;
819                let mut s = self.take_str()?;
820                s.truncate(s.len() - 2);
821                self.yield_term(TokenID::Func, arena.atom(s));
822            }
823            Rule::AtomLeftBrace => {}
824            Rule::StrLeftBrace => {
825                self.begin(Mode::Expr);
826                self.nest_count += 1;
827                self.curly_nest_count += 1;
828                let mut s = self.take_str()?;
829                s.pop();
830                self.yield_term(TokenID::Str, arena.str(s));
831                let op_tab_idx = self.opers.lookup("++");
832                self.yield_optab(TokenID::AtomOper, arena.atom("++"), op_tab_idx);
833                self.yield_id(TokenID::LeftParen);
834            }
835            Rule::StrAtomNewLine => {
836                self.ctx_mut().line_no += 1;
837            }
838            Rule::Error => {
839                let s = self.take_str()?;
840                self.yield_term(TokenID::Error, arena.str(s));
841            }
842            Rule::End => {
843                if self.ctx().mode == Mode::Expr {
844                    self.yield_id(TokenID::End);
845                } else {
846                    self.yield_term(TokenID::Error, arena.str("<END>"));
847                }
848            }
849        }
850
851        log::trace!(
852            "ACTION end:   mode {:?}, rule {:?}, buf {:?}, buf2 {:?}, label {:?}, accum {}",
853            self.ctx().mode,
854            rule,
855            str::from_utf8(&self.ctx().buffer),
856            str::from_utf8(&self.ctx().buffer2),
857            str::from_utf8(&self.bin_label),
858            self.ctx().accum_flag,
859        );
860
861        Ok(())
862    }
863}
864
865#[cfg(test)]
866mod tests {
867    use super::*;
868
869    fn lex(arena: &mut Arena, s: &str) -> Result<Vec<TermToken>> {
870        let mut lx = TermLexer::try_new(s.bytes().fuse(), Some(OperDefs::new()))?;
871        Ok(lx.try_collect(arena)?)
872    }
873
874    #[test]
875    fn test_dates() {
876        let _ = env_logger::builder().is_test(true).try_init();
877        let mut arena = Arena::new();
878        const DATES: &[(&str, u8)] = &[
879            ("date{-5381856000000}", 0),
880            ("date{-5381830320000}", 1),
881            ("date{-5381830311000}", 2),
882            ("date{-5381830310999}", 3),
883            ("date{1799-06-16}", 0),
884            ("date{1799-06-16Z}", 0),
885            ("date{1799-06-16 Z}", 0),
886            ("date{1799-06-16-00:00}", 0),
887            ("date{1799-06-16 -00:00}", 0),
888            ("date{1799-06-16T07:08}", 1),
889            ("date{1799-06-16T07:08:09}", 2),
890            ("date{1799-06-16T07:08:09Z}", 2),
891            ("date{1799-06-16T07:08:09.001Z}", 3),
892            ("date{1799-06-16T07:08:09 Z}", 2),
893            ("date{1799-06-16T07:08:09.001 Z}", 3),
894            ("date{1799-06-16T07:08:09+00:00}", 2),
895            ("date{1799-06-16T07:08:09.001+00:00}", 3),
896            ("date{1799-06-16T07:08:09 +00:00}", 2),
897            ("date{1799-06-16T07:08:09.001 +00:00}", 3),
898            ("date{1799-06-16T07:08:09Z}", 2),
899            ("date{1799-06-16T07:08:09.001Z}", 3),
900            ("date{1799-06-16 07:08:09 Z}", 2),
901            ("date{1799-06-16T07:08:09.001 Z}", 3),
902            ("date{1799-06-16 07:08:09+00:00}", 2),
903            ("date{1799-06-16T07:08:09.001+00:00}", 3),
904            ("date{1799-06-16 07:08:09 +00:00}", 2),
905            ("date{1799-06-16 07:08:09.001 +00:00}", 3),
906            ("date{1799-06-16T07:08Z}", 1),
907            ("date{1799-06-16T07:08 Z  }", 1),
908            ("date{  1799-06-16T07:08+00:00}", 1),
909            ("date{ 1799-06-16T07:08 +00:00   }", 1),
910            ("date{06/16/1799Z}", 0),
911            ("date{06/16/1799 Z}", 0),
912            ("date{06/16/1799+00:00}", 0),
913            ("date{06/16/1799 +00:00}", 0),
914            ("date{06/16/1799 07:08Z}", 1),
915            ("date{06/16/1799 07:08:09Z}", 2),
916            ("date{06/16/1799 07:08:09.001Z}", 3),
917            ("date{06/16/1799 07:08 Z}", 1),
918            ("date{06/16/1799 07:08:09 Z}", 2),
919            ("date{06/16/1799 07:08:09.001 Z}", 3),
920            ("date{06/16/1799 07:08+00:00}", 1),
921            ("date{06/16/1799 07:08:09+00:00}", 2),
922            ("date{06/16/1799 07:08:09.001+00:00}", 3),
923            ("date{06/16/1799 07:08 +00:00}", 1),
924            ("date{06/16/1799 07:08:09 +00:00}", 2),
925            ("date{06/16/1799 07:08:09.001 +00:00}", 3),
926            ("date{16-Jun-1799Z}", 0),
927            ("date{16-jun-1799 Z}", 0),
928            ("date{16-JUN-1799+00:00}", 0),
929            ("date{16-Jun-1799 +00:00}", 0),
930            ("date{16-Jun-1799 07:08Z}", 1),
931            ("date{16-JUN-1799 07:08:09Z}", 2),
932            ("date{16-Jun-1799 07:08:09.001Z}", 3),
933            ("date{16-Jun-1799 07:08 Z}", 1),
934            ("date{16-jun-1799 07:08:09 Z}", 2),
935            ("date{16-Jun-1799 07:08:09.001 Z}", 3),
936            ("date{16-Jun-1799 07:08+00:00}", 1),
937            ("date{16-Jun-1799 07:08:09+00:00}", 2),
938            ("date{16-Jun-1799 07:08:09.001+00:00}", 3),
939            ("date{16-Jun-1799 07:08 +00:00}", 1),
940            ("date{16-Jun-1799 07:08:09 +00:00}", 2),
941            ("date{16-Jun-1799 07:08:09.001 +00:00}", 3),
942        ];
943        for (s, k) in DATES {
944            let mut ts = lex(&mut arena, s).unwrap();
945            let tok = ts.remove(0);
946            assert_eq!(tok.token_id, TokenID::Date);
947            let term = Term::try_from(tok.value).unwrap();
948            let d = term.unpack_date(&arena).unwrap();
949            assert_eq!(
950                d,
951                match k {
952                    0 => -5381856000000,
953                    1 => -5381830320000,
954                    2 => -5381830311000,
955                    3 => -5381830310999,
956                    _ => unreachable!(),
957                }
958            );
959        }
960    }
961
962    #[test]
963    fn test_atoms() {
964        let mut arena = Arena::new();
965        let ts = lex(&mut arena, "\na+foo-x '^&%^&%^&%''abc' 'AAA'").unwrap();
966        dbg!(&ts);
967        assert!(ts.len() == 9);
968        assert!(ts.iter().take(ts.len() - 1).all(|t| {
969            t.line_no == 2
970                && matches!(
971                    Term::try_from(t.value.clone())
972                        .unwrap()
973                        .view(&arena)
974                        .unwrap(),
975                    View::Atom(_)
976                )
977        }));
978    }
979
980    #[test]
981    fn test_bin() {
982        let mut arena = Arena::new();
983        let ts = lex(&mut arena, "% single line comment\nbin{3:\x00\x01\x02 eob:\x00\x01:aaa\x02:eob eob\n\x00\neob eob\r\n\x00\r\neob\r\n}\r\nhex{   0203 0405 FE }").unwrap();
984        dbg!(&ts);
985        assert!(ts.len() == 3);
986        assert!(matches!(
987            Term::try_from(ts[0].value.clone())
988                .unwrap()
989                .view(&arena)
990                .unwrap(),
991            View::Bin(_)
992        ));
993        match Term::try_from(ts[0].value.clone())
994            .unwrap()
995            .view(&arena)
996            .unwrap()
997        {
998            View::Bin(bytes) => assert!(bytes == &[0, 1, 2, 0, 1, 58, 97, 97, 97, 2, 0, 0,]),
999            _ => unreachable!(),
1000        }
1001    }
1002
1003    #[test]
1004    fn test_text() {
1005        let mut arena = Arena::new();
1006        let ts = lex(&mut arena, "/* single /* line */ comment */\ntext{3:abc eob:de:aaa:eob eob\n0\neob eob\r\n1\r\neob\r\n}\r\n").unwrap();
1007        dbg!(&ts);
1008        assert!(ts.len() == 2);
1009        assert!(matches!(
1010            Term::try_from(ts[0].value.clone())
1011                .unwrap()
1012                .view(&arena)
1013                .unwrap(),
1014            View::Str(_)
1015        ));
1016        match Term::try_from(ts[0].value.clone())
1017            .unwrap()
1018            .view(&arena)
1019            .unwrap()
1020        {
1021            View::Str(s) => assert!(s == "abcde:aaa01"),
1022            _ => unreachable!(),
1023        }
1024    }
1025
1026    #[test]
1027    fn test_texts() {
1028        let mut arena = Arena::new();
1029        let ts = lex(&mut arena, "/* single [ ( { /* line */ comment */\n\"hello\" {hello} text{5:hello} text{e:hello:e} text{e:h:e e:e:e 2:ll e:o:e} text{\ne\nhello\ne}").unwrap();
1030        dbg!(&ts);
1031        assert!(ts.len() == 7);
1032        assert!(matches!(
1033            Term::try_from(ts[0].value.clone())
1034                .unwrap()
1035                .view(&arena)
1036                .unwrap(),
1037            View::Str(_)
1038        ));
1039        assert!(ts.iter().take(ts.len() - 1).all(|t| {
1040            match Term::try_from(t.value.clone())
1041                .unwrap()
1042                .view(&arena)
1043                .unwrap()
1044            {
1045                View::Str(s) => s == "hello",
1046                _ => false,
1047            }
1048        }));
1049    }
1050
1051    #[test]
1052    fn test_integers() {
1053        let mut arena = Arena::new();
1054        let ts = lex(&mut arena, "[2'01010001111, 10'123, 36'AZ]").unwrap();
1055        assert!(ts.len() == 8);
1056        assert!(matches!(ts[1].token_id, TokenID::Int));
1057    }
1058
1059    #[test]
1060    fn lex_string_subs() {
1061        let _ = env_logger::builder().is_test(true).try_init();
1062        let arena = &mut Arena::new();
1063        let ts = lex(arena, "\"aaa{1 + 2}bbb{3 * 4}ccc\"").unwrap();
1064        assert_eq!(ts.len(), 18);
1065        let t0: Term = ts[0].value.clone().try_into().unwrap();
1066        let t1: Term = ts[8].value.clone().try_into().unwrap();
1067        let t2: Term = ts[16].value.clone().try_into().unwrap();
1068        assert_eq!(t0.unpack_str(arena).unwrap(), "aaa");
1069        assert_eq!(t1.unpack_str(arena).unwrap(), "bbb");
1070        assert_eq!(t2.unpack_str(arena).unwrap(), "ccc");
1071    }
1072}