Skip to main content

oak_coq/lexer/
mod.rs

1use crate::{kind::CoqSyntaxKind, language::CoqLanguage};
2use oak_core::{
3    Lexer, LexerState, TextEdit,
4    lexer::{LexOutput, LexerCache},
5    source::Source,
6};
7
8/// A lexer for the Coq programming language.
9#[derive(Clone, Debug)]
10pub struct CoqLexer<'config> {
11    #[allow(dead_code)]
12    config: &'config CoqLanguage,
13}
14
15type State<'a, S> = LexerState<'a, S, CoqLanguage>;
16
17impl<'config> CoqLexer<'config> {
18    /// Creates a new CoqLexer with the given configuration.
19    ///
20    /// # Arguments
21    ///
22    /// * `config` - A reference to the CoqLanguage configuration
23    ///
24    /// # Returns
25    ///
26    /// A new CoqLexer instance
27    pub fn new(config: &'config CoqLanguage) -> Self {
28        Self { config }
29    }
30
31    fn lex_internal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
32        while let Some(c) = state.peek() {
33            let start = state.get_position();
34            match c {
35                ' ' | '\t' | '\r' => {
36                    state.advance(1);
37                    while let Some(' ' | '\t' | '\r') = state.peek() {
38                        state.advance(1);
39                    }
40                    state.add_token(CoqSyntaxKind::Whitespace, start, state.get_position());
41                }
42                '\n' => {
43                    state.advance(1);
44                    state.add_token(CoqSyntaxKind::Newline, start, state.get_position());
45                }
46                '(' => {
47                    state.advance(1);
48                    if let Some('*') = state.peek() {
49                        state.advance(1);
50                        self.lex_comment(state, start);
51                    }
52                    else {
53                        state.add_token(CoqSyntaxKind::LeftParen, start, state.get_position());
54                    }
55                }
56                ')' => {
57                    state.advance(1);
58                    state.add_token(CoqSyntaxKind::RightParen, start, state.get_position());
59                }
60                '[' => {
61                    state.advance(1);
62                    state.add_token(CoqSyntaxKind::LeftBracket, start, state.get_position());
63                }
64                ']' => {
65                    state.advance(1);
66                    state.add_token(CoqSyntaxKind::RightBracket, start, state.get_position());
67                }
68                '{' => {
69                    state.advance(1);
70                    state.add_token(CoqSyntaxKind::LeftBrace, start, state.get_position());
71                }
72                '}' => {
73                    state.advance(1);
74                    state.add_token(CoqSyntaxKind::RightBrace, start, state.get_position());
75                }
76                ',' => {
77                    state.advance(1);
78                    state.add_token(CoqSyntaxKind::Comma, start, state.get_position());
79                }
80                ';' => {
81                    state.advance(1);
82                    state.add_token(CoqSyntaxKind::Semicolon, start, state.get_position());
83                }
84                '.' => {
85                    state.advance(1);
86                    state.add_token(CoqSyntaxKind::Dot, start, state.get_position());
87                }
88                ':' => {
89                    state.advance(1);
90                    if let Some(':') = state.peek() {
91                        state.advance(1);
92                        if let Some('=') = state.peek() {
93                            state.advance(1);
94                            state.add_token(CoqSyntaxKind::DoubleColonEqual, start, state.get_position());
95                        }
96                        else {
97                            state.add_token(CoqSyntaxKind::DoubleColon, start, state.get_position());
98                        }
99                    }
100                    else if let Some('=') = state.peek() {
101                        state.advance(1);
102                        state.add_token(CoqSyntaxKind::ColonEqual, start, state.get_position());
103                    }
104                    else {
105                        state.add_token(CoqSyntaxKind::Colon, start, state.get_position());
106                    }
107                }
108                '=' => {
109                    state.advance(1);
110                    if let Some('>') = state.peek() {
111                        state.advance(1);
112                        state.add_token(CoqSyntaxKind::DoubleArrow, start, state.get_position());
113                    }
114                    else {
115                        state.add_token(CoqSyntaxKind::Equal, start, state.get_position());
116                    }
117                }
118                '<' => {
119                    state.advance(1);
120                    if let Some('-') = state.peek() {
121                        state.advance(1);
122                        state.add_token(CoqSyntaxKind::LeftArrow, start, state.get_position());
123                    }
124                    else if let Some('>') = state.peek() {
125                        state.advance(1);
126                        state.add_token(CoqSyntaxKind::DoubleArrow, start, state.get_position());
127                    }
128                    else if let Some('=') = state.peek() {
129                        state.advance(1);
130                        state.add_token(CoqSyntaxKind::LessEqual, start, state.get_position());
131                    }
132                    else {
133                        state.add_token(CoqSyntaxKind::Less, start, state.get_position());
134                    }
135                }
136                '>' => {
137                    state.advance(1);
138                    if let Some('=') = state.peek() {
139                        state.advance(1);
140                        state.add_token(CoqSyntaxKind::GreaterEqual, start, state.get_position());
141                    }
142                    else {
143                        state.add_token(CoqSyntaxKind::Greater, start, state.get_position());
144                    }
145                }
146                '|' => {
147                    state.advance(1);
148                    if let Some('-') = state.peek() {
149                        state.advance(1);
150                        state.add_token(CoqSyntaxKind::Turnstile, start, state.get_position());
151                    }
152                    else {
153                        state.add_token(CoqSyntaxKind::Pipe, start, state.get_position());
154                    }
155                }
156                '-' => {
157                    state.advance(1);
158                    if let Some('>') = state.peek() {
159                        state.advance(1);
160                        state.add_token(CoqSyntaxKind::Arrow, start, state.get_position());
161                    }
162                    else {
163                        state.add_token(CoqSyntaxKind::Minus, start, state.get_position());
164                    }
165                }
166                '+' => {
167                    state.advance(1);
168                    state.add_token(CoqSyntaxKind::Plus, start, state.get_position());
169                }
170                '*' => {
171                    state.advance(1);
172                    state.add_token(CoqSyntaxKind::Star, start, state.get_position());
173                }
174                '/' => {
175                    state.advance(1);
176                    if let Some('\\') = state.peek() {
177                        state.advance(1);
178                        state.add_token(CoqSyntaxKind::And, start, state.get_position());
179                    }
180                    else {
181                        state.add_token(CoqSyntaxKind::Slash, start, state.get_position());
182                    }
183                }
184                '\\' => {
185                    state.advance(1);
186                    if let Some('/') = state.peek() {
187                        state.advance(1);
188                        state.add_token(CoqSyntaxKind::Or, start, state.get_position());
189                    }
190                    else {
191                        state.add_token(CoqSyntaxKind::Backslash, start, state.get_position());
192                    }
193                }
194                '~' => {
195                    state.advance(1);
196                    state.add_token(CoqSyntaxKind::Tilde, start, state.get_position());
197                }
198                '!' => {
199                    state.advance(1);
200                    state.add_token(CoqSyntaxKind::Exclamation, start, state.get_position());
201                }
202                '?' => {
203                    state.advance(1);
204                    state.add_token(CoqSyntaxKind::Question, start, state.get_position());
205                }
206                '@' => {
207                    state.advance(1);
208                    state.add_token(CoqSyntaxKind::At, start, state.get_position());
209                }
210                '#' => {
211                    state.advance(1);
212                    state.add_token(CoqSyntaxKind::Hash, start, state.get_position());
213                }
214                '$' => {
215                    state.advance(1);
216                    state.add_token(CoqSyntaxKind::Dollar, start, state.get_position());
217                }
218                '%' => {
219                    state.advance(1);
220                    state.add_token(CoqSyntaxKind::Percent, start, state.get_position());
221                }
222                '^' => {
223                    state.advance(1);
224                    state.add_token(CoqSyntaxKind::Caret, start, state.get_position());
225                }
226                '&' => {
227                    state.advance(1);
228                    state.add_token(CoqSyntaxKind::Ampersand, start, state.get_position());
229                }
230                '"' => {
231                    state.advance(1);
232                    self.lex_string(state, start);
233                }
234                '0'..='9' => {
235                    state.advance(1);
236                    while let Some('0'..='9') = state.peek() {
237                        state.advance(1);
238                    }
239                    state.add_token(CoqSyntaxKind::NumberLiteral, start, state.get_position());
240                }
241                'a'..='z' | 'A'..='Z' | '_' => {
242                    state.advance(1);
243                    while let Some('a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '\'') = state.peek() {
244                        state.advance(1);
245                    }
246                    let end = state.get_position();
247                    let text = state.get_text_in((start..end).into());
248                    let kind = match text.as_ref() {
249                        "Theorem" => CoqSyntaxKind::Theorem,
250                        "Lemma" => CoqSyntaxKind::Lemma,
251                        "Remark" => CoqSyntaxKind::Remark,
252                        "Fact" => CoqSyntaxKind::Fact,
253                        "Corollary" => CoqSyntaxKind::Corollary,
254                        "Proposition" => CoqSyntaxKind::Proposition,
255                        "Definition" => CoqSyntaxKind::Definition,
256                        "Example" => CoqSyntaxKind::Example,
257                        "Fixpoint" => CoqSyntaxKind::Fixpoint,
258                        "CoFixpoint" => CoqSyntaxKind::CoFixpoint,
259                        "Inductive" => CoqSyntaxKind::Inductive,
260                        "CoInductive" => CoqSyntaxKind::CoInductive,
261                        "Record" => CoqSyntaxKind::Record,
262                        "Structure" => CoqSyntaxKind::Structure,
263                        "Variant" => CoqSyntaxKind::Variant,
264                        "Module" => CoqSyntaxKind::Module,
265                        "Section" => CoqSyntaxKind::Section,
266                        "End" => CoqSyntaxKind::End,
267                        "Require" => CoqSyntaxKind::Require,
268                        "Import" => CoqSyntaxKind::Import,
269                        "Export" => CoqSyntaxKind::Export,
270                        "Proof" => CoqSyntaxKind::Proof,
271                        "Qed" => CoqSyntaxKind::Qed,
272                        "Defined" => CoqSyntaxKind::Defined,
273                        "Admitted" => CoqSyntaxKind::Admitted,
274                        "Abort" => CoqSyntaxKind::Abort,
275                        "Match" => CoqSyntaxKind::Match,
276                        "With" => CoqSyntaxKind::With,
277                        "Forall" => CoqSyntaxKind::Forall,
278                        "Exists" => CoqSyntaxKind::Exists,
279                        "Fun" => CoqSyntaxKind::Fun,
280                        "Let" => CoqSyntaxKind::Let,
281                        "In" => CoqSyntaxKind::In,
282                        "If" => CoqSyntaxKind::If,
283                        "Then" => CoqSyntaxKind::Then,
284                        "Else" => CoqSyntaxKind::Else,
285                        "Type" => CoqSyntaxKind::Type,
286                        "Prop" => CoqSyntaxKind::Prop,
287                        "Set" => CoqSyntaxKind::Set,
288                        "Check" => CoqSyntaxKind::Check,
289                        "Print" => CoqSyntaxKind::Print,
290                        "Search" => CoqSyntaxKind::Search,
291                        "Locate" => CoqSyntaxKind::Locate,
292                        "About" => CoqSyntaxKind::About,
293                        _ => CoqSyntaxKind::Identifier,
294                    };
295                    state.add_token(kind, start, end);
296                }
297                _ => {
298                    state.advance(1);
299                    state.add_token(CoqSyntaxKind::Error, start, state.get_position());
300                }
301            }
302        }
303    }
304
305    fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, start: usize) {
306        let mut depth = 1;
307        while let Some(c) = state.peek() {
308            match c {
309                '(' => {
310                    state.advance(1);
311                    if let Some('*') = state.peek() {
312                        state.advance(1);
313                        depth += 1;
314                    }
315                }
316                '*' => {
317                    state.advance(1);
318                    if let Some(')') = state.peek() {
319                        state.advance(1);
320                        depth -= 1;
321                        if depth == 0 {
322                            state.add_token(CoqSyntaxKind::Comment, start, state.get_position());
323                            return;
324                        }
325                    }
326                }
327                _ => {
328                    state.advance(1);
329                }
330            }
331        }
332        state.add_token(CoqSyntaxKind::Comment, start, state.get_position());
333    }
334
335    fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, start: usize) {
336        while let Some(c) = state.peek() {
337            match c {
338                '"' => {
339                    state.advance(1);
340                    if let Some('"') = state.peek() {
341                        state.advance(1); // Escaped quote
342                    }
343                    else {
344                        state.add_token(CoqSyntaxKind::StringLiteral, start, state.get_position());
345                        return;
346                    }
347                }
348                _ => {
349                    state.advance(1);
350                }
351            }
352        }
353        state.add_token(CoqSyntaxKind::StringLiteral, start, state.get_position());
354    }
355}
356
357impl<'config> Lexer<CoqLanguage> for CoqLexer<'config> {
358    fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<CoqLanguage>) -> LexOutput<CoqLanguage> {
359        let mut state: State<'_, S> = LexerState::new(text);
360        // TODO: Implement incremental lexing using edits and cache
361        self.lex_internal(&mut state);
362        state.add_eof();
363        state.finish_with_cache(Ok(()), cache)
364    }
365}