dedukti_parse/
lex.rs

1//! Lexing.
2
3use crate::Symb;
4use core::fmt::{self, Display};
5use logos::{Filter, Lexer, Logos};
6
7/// A token generated by the lexer.
8#[derive(Logos, Debug, PartialEq, Eq)]
9#[logos(type S = &str)]
10pub enum Token<S> {
11    /// definition
12    #[token("def")]
13    Def,
14
15    /// theorem
16    #[token("thm")]
17    Thm,
18
19    /// opening bracket
20    #[token("[")]
21    LBrk,
22
23    /// closing bracket
24    #[token("]")]
25    RBrk,
26
27    /// opening parenthesis
28    #[token("(")]
29    LPar,
30
31    /// closing parenthesis
32    #[token(")")]
33    RPar,
34
35    /// has type
36    #[token(":")]
37    Colon,
38
39    /// is defined as
40    #[token(":=")]
41    ColonEq,
42
43    /// product
44    #[token("->")]
45    Arrow,
46
47    /// abstraction
48    #[token("=>")]
49    FatArrow,
50
51    /// rewrites to
52    #[token("-->")]
53    LongArrow,
54
55    /// separate variables in rewrite rule contexts
56    #[token(",")]
57    Comma,
58
59    /// terminate command
60    #[token(".")]
61    Dot,
62
63    /// symbol
64    #[regex("[a-zA-Z0-9_!?][a-zA-Z0-9_!?']*", symb)]
65    #[token("{|", moustache)]
66    Symb(Symb<S>),
67
68    /// unclosed comments (the number indicates how many comments are still open)
69    #[token("(;", comment1)]
70    Comment(usize),
71
72    /// unrecognised token
73    #[regex(r"[ \t\n\f]+", logos::skip)]
74    #[error]
75    Error,
76}
77
78impl<S: Display> Display for Token<S> {
79    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
80        match self {
81            Self::Def => "def".fmt(f),
82            Self::Thm => "thm".fmt(f),
83            Self::LBrk => "[".fmt(f),
84            Self::RBrk => "]".fmt(f),
85            Self::LPar => "(".fmt(f),
86            Self::RPar => ")".fmt(f),
87            Self::Colon => ":".fmt(f),
88            Self::ColonEq => ":=".fmt(f),
89            Self::Arrow => "->".fmt(f),
90            Self::FatArrow => "=>".fmt(f),
91            Self::LongArrow => "-->".fmt(f),
92            Self::Comma => ",".fmt(f),
93            Self::Dot => ".".fmt(f),
94            Self::Symb(s) => s.fmt(f),
95            Self::Comment(_) => " ".fmt(f),
96            Self::Error => Err(Default::default()),
97        }
98    }
99}
100
101fn symb<'s>(lex: &mut Lexer<'s, Token<&'s str>>) -> Option<Symb<&'s str>> {
102    let mut symb = Symb::new(lex.slice());
103
104    // regular expressions for head and tail of identifiers
105    let ih = |c| matches!(c, 'a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9' | '_' | '!' | '?');
106    let it = |c| matches!(c, 'a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9' | '_' | '!' | '?' | '\'');
107
108    while let Some(after_dot) = lex.remainder().strip_prefix('.') {
109        let len = if let Some(tail) = after_dot.strip_prefix(ih) {
110            1 + tail.find(|c| !it(c)).unwrap_or(tail.len())
111        } else if let Some(after_moustache) = after_dot.strip_prefix("{|") {
112            2 + after_moustache.find("|}")? + 2
113        } else {
114            break;
115        };
116
117        symb.push(&after_dot[..len]);
118        lex.bump(1 + len); // eat the dot
119    }
120
121    Some(symb)
122}
123
124fn moustache<'s>(lex: &mut Lexer<'s, Token<&'s str>>) -> Option<Symb<&'s str>> {
125    let len = lex.remainder().find("|}")?;
126    lex.bump(len + 2); // include len of `|}`
127    symb(lex)
128}
129
130fn comment1<'s>(lex: &mut Lexer<'s, Token<&'s str>>) -> Filter<usize> {
131    match comment(lex, 1) {
132        0 => Filter::Skip,
133        n => Filter::Emit(n),
134    }
135}
136
137/// Lex inside a comment until end of comment or input, return number of open comments.
138pub(crate) fn comment<'s>(lex: &mut Lexer<'s, Token<&'s str>>, mut open: usize) -> usize {
139    let prefix: &[_] = &['(', ';'];
140    while open > 0 {
141        // go to first occurrence of either ';' or '('
142        match lex.remainder().find(prefix) {
143            Some(offset) => lex.bump(offset),
144            None => {
145                lex.bump(lex.remainder().len());
146                return open;
147            }
148        };
149        if lex.remainder().starts_with("(;") {
150            open += 1;
151            lex.bump(2);
152        } else if lex.remainder().starts_with(";)") {
153            open -= 1;
154            lex.bump(2);
155        } else {
156            lex.bump(1);
157        }
158    }
159    0
160}
161
162#[test]
163fn comment_open() {
164    let s = "opening .. (; closing .. ;) still one open ..";
165    assert!(matches!(comment1(&mut Token::lexer(s)), Filter::Emit(1)));
166
167    let s = "closing .. ;) none open";
168    assert!(matches!(comment1(&mut Token::lexer(s)), Filter::Skip));
169}