Skip to main content

oak_fortran/lexer/
mod.rs

1use crate::{kind::FortranSyntaxKind, language::FortranLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, FortranLanguage>;
5
6#[derive(Clone)]
7pub struct FortranLexer<'config> {
8    _config: &'config FortranLanguage,
9}
10
11impl<'config> Lexer<FortranLanguage> for FortranLexer<'config> {
12    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<FortranLanguage>) -> LexOutput<FortranLanguage> {
13        let mut state = LexerState::new(source);
14        let result = self.run(&mut state);
15        if result.is_ok() {
16            state.add_eof();
17        }
18        state.finish_with_cache(result, cache)
19    }
20}
21
22impl<'config> FortranLexer<'config> {
23    pub fn new(config: &'config FortranLanguage) -> Self {
24        Self { _config: config }
25    }
26
27    fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
28        while state.not_at_end() {
29            let safe_point = state.get_position();
30
31            if self.lex_newline(state) {
32                continue;
33            }
34
35            if self.skip_whitespace(state) {
36                continue;
37            }
38
39            if self.skip_comment(state) {
40                continue;
41            }
42
43            if self.lex_string_literal(state) {
44                continue;
45            }
46
47            if self.lex_char_literal(state) {
48                continue;
49            }
50
51            if self.lex_number_literal(state) {
52                continue;
53            }
54
55            if self.lex_identifier_or_keyword(state) {
56                continue;
57            }
58
59            if self.lex_operator_or_single_char(state) {
60                continue;
61            }
62
63            // If no lexer matched, advance by one character to avoid infinite loop
64            if let Some(c) = state.current() {
65                state.advance(c.len_utf8());
66            }
67
68            state.advance_if_dead_lock(safe_point);
69        }
70
71        Ok(())
72    }
73
74    fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
75        let start = state.get_position();
76        if let Some(ch) = state.current() {
77            if ch == '\n' {
78                state.advance(1);
79                state.add_token(FortranSyntaxKind::Newline, start, state.get_position());
80                return true;
81            }
82            if ch == '\r' {
83                state.advance(1);
84                if state.current() == Some('\n') {
85                    state.advance(1);
86                }
87                state.add_token(FortranSyntaxKind::Newline, start, state.get_position());
88                return true;
89            }
90        }
91        false
92    }
93
94    fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
95        let mut advanced = false;
96
97        while let Some(ch) = state.current() {
98            if ch == ' ' || ch == '\t' {
99                state.advance(ch.len_utf8());
100                advanced = true;
101            }
102            else {
103                break;
104            }
105        }
106
107        advanced
108    }
109
110    fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
111        // Check for Fortran comment indicators
112        if let Some(ch) = state.current() {
113            if ch == '!' {
114                // Skip to end of line
115                while let Some(c) = state.current() {
116                    if c == '\n' || c == '\r' {
117                        break;
118                    }
119                    state.advance(c.len_utf8());
120                }
121                return true;
122            }
123        }
124        false
125    }
126
127    fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
128        let start = state.get_position();
129
130        if state.current() != Some('"') {
131            return false;
132        }
133
134        state.advance(1); // consume opening quote
135
136        while let Some(ch) = state.current() {
137            if ch == '"' {
138                state.advance(1); // consume closing quote
139                break;
140            }
141            if ch == '\n' || ch == '\r' {
142                break; // Fortran strings don't span lines
143            }
144            state.advance(ch.len_utf8());
145        }
146
147        state.add_token(FortranSyntaxKind::StringLiteral, start, state.get_position());
148        true
149    }
150
151    fn lex_char_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
152        let start = state.get_position();
153
154        if state.current() != Some('\'') {
155            return false;
156        }
157
158        state.advance(1); // consume opening quote
159
160        // Consume exactly one character (or none for empty char literal)
161        if let Some(ch) = state.current() {
162            if ch != '\'' && ch != '\n' && ch != '\r' {
163                state.advance(ch.len_utf8());
164            }
165        }
166
167        // Consume closing quote if present
168        if state.current() == Some('\'') {
169            state.advance(1);
170        }
171
172        state.add_token(FortranSyntaxKind::CharLiteral, start, state.get_position());
173        true
174    }
175
176    fn lex_number_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
177        let start = state.get_position();
178        let first = match state.current() {
179            Some(c) => c,
180            None => return false,
181        };
182
183        if !first.is_ascii_digit() {
184            return false;
185        }
186
187        // Read integer part
188        state.advance(1);
189        while let Some(c) = state.current() {
190            if c.is_ascii_digit() || c == '_' {
191                state.advance(1);
192            }
193            else {
194                break;
195            }
196        }
197
198        // Check for decimal point
199        if state.current() == Some('.') {
200            let n1 = state.peek_next_n(1);
201            if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
202                state.advance(1); // consume '.'
203                while let Some(c) = state.current() {
204                    if c.is_ascii_digit() || c == '_' {
205                        state.advance(1);
206                    }
207                    else {
208                        break;
209                    }
210                }
211            }
212        }
213
214        // Check for exponent (e, E, d, D for Fortran)
215        if let Some(c) = state.current() {
216            if c == 'e' || c == 'E' || c == 'd' || c == 'D' {
217                let n1 = state.peek_next_n(1);
218                if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
219                    state.advance(1);
220                    if let Some(sign) = state.current() {
221                        if sign == '+' || sign == '-' {
222                            state.advance(1);
223                        }
224                    }
225                    while let Some(d) = state.current() {
226                        if d.is_ascii_digit() || d == '_' {
227                            state.advance(1);
228                        }
229                        else {
230                            break;
231                        }
232                    }
233                }
234            }
235        }
236
237        let end = state.get_position();
238        state.add_token(FortranSyntaxKind::NumberLiteral, start, end);
239        true
240    }
241
242    fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
243        let start = state.get_position();
244        let first = match state.current() {
245            Some(c) => c,
246            None => return false,
247        };
248
249        if !first.is_ascii_alphabetic() && first != '_' {
250            return false;
251        }
252
253        state.advance(1);
254        while let Some(c) = state.current() {
255            if c.is_ascii_alphanumeric() || c == '_' {
256                state.advance(1);
257            }
258            else {
259                break;
260            }
261        }
262
263        let end = state.get_position();
264        let text = state.get_text_in((start..end).into());
265
266        let kind = match text.to_lowercase().as_str() {
267            "program" => FortranSyntaxKind::Program,
268            "end" => FortranSyntaxKind::End,
269            "subroutine" => FortranSyntaxKind::Subroutine,
270            "function" => FortranSyntaxKind::Function,
271            "integer" => FortranSyntaxKind::Integer,
272            "real" => FortranSyntaxKind::Real,
273            "double" => FortranSyntaxKind::Double,
274            "precision" => FortranSyntaxKind::Precision,
275            "character" => FortranSyntaxKind::Character,
276            "logical" => FortranSyntaxKind::Logical,
277            "complex" => FortranSyntaxKind::Complex,
278            "if" => FortranSyntaxKind::If,
279            "then" => FortranSyntaxKind::Then,
280            "else" => FortranSyntaxKind::Else,
281            "elseif" => FortranSyntaxKind::ElseIf,
282            "endif" => FortranSyntaxKind::EndIf,
283            "do" => FortranSyntaxKind::Do,
284            "enddo" => FortranSyntaxKind::EndDo,
285            "while" => FortranSyntaxKind::While,
286            "call" => FortranSyntaxKind::Call,
287            "return" => FortranSyntaxKind::Return,
288            "stop" => FortranSyntaxKind::Stop,
289            "continue" => FortranSyntaxKind::Continue,
290            "goto" => FortranSyntaxKind::Goto,
291            "implicit" => FortranSyntaxKind::Implicit,
292            "none" => FortranSyntaxKind::None,
293            "parameter" => FortranSyntaxKind::Parameter,
294            "dimension" => FortranSyntaxKind::Dimension,
295            "common" => FortranSyntaxKind::Common,
296            "equivalence" => FortranSyntaxKind::Equivalence,
297            "external" => FortranSyntaxKind::External,
298            "intrinsic" => FortranSyntaxKind::Intrinsic,
299            "save" => FortranSyntaxKind::Save,
300            "data" => FortranSyntaxKind::Data,
301            "format" => FortranSyntaxKind::Format,
302            "read" => FortranSyntaxKind::Read,
303            "write" => FortranSyntaxKind::Write,
304            "print" => FortranSyntaxKind::Print,
305            "open" => FortranSyntaxKind::Open,
306            "close" => FortranSyntaxKind::Close,
307            "inquire" => FortranSyntaxKind::Inquire,
308            "rewind" => FortranSyntaxKind::Rewind,
309            "backspace" => FortranSyntaxKind::Backspace,
310            "endfile" => FortranSyntaxKind::EndFile,
311            "true" => FortranSyntaxKind::True,
312            "false" => FortranSyntaxKind::False,
313            "and" => FortranSyntaxKind::And,
314            "or" => FortranSyntaxKind::Or,
315            "not" => FortranSyntaxKind::Not,
316            "eq" => FortranSyntaxKind::Eq,
317            "ne" => FortranSyntaxKind::Ne,
318            "lt" => FortranSyntaxKind::Lt,
319            "le" => FortranSyntaxKind::Le,
320            "gt" => FortranSyntaxKind::Gt,
321            "ge" => FortranSyntaxKind::Ge,
322            _ => FortranSyntaxKind::Identifier,
323        };
324
325        state.add_token(kind, start, end);
326        true
327    }
328
329    fn lex_operator_or_single_char<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
330        let start = state.get_position();
331        let c = match state.current() {
332            Some(c) => c,
333            None => return false,
334        };
335
336        match c {
337            '\n' => {
338                state.advance(1);
339                state.add_token(FortranSyntaxKind::Newline, start, state.get_position());
340            }
341            '(' => {
342                state.advance(1);
343                state.add_token(FortranSyntaxKind::LeftParen, start, state.get_position());
344            }
345            ')' => {
346                state.advance(1);
347                state.add_token(FortranSyntaxKind::RightParen, start, state.get_position());
348            }
349            ',' => {
350                state.advance(1);
351                state.add_token(FortranSyntaxKind::Comma, start, state.get_position());
352            }
353            '=' => {
354                state.advance(1);
355                if state.current() == Some('=') {
356                    state.advance(1);
357                    state.add_token(FortranSyntaxKind::EqualEqual, start, state.get_position());
358                }
359                else {
360                    state.add_token(FortranSyntaxKind::Equal, start, state.get_position());
361                }
362            }
363            '+' => {
364                state.advance(1);
365                state.add_token(FortranSyntaxKind::Plus, start, state.get_position());
366            }
367            '-' => {
368                state.advance(1);
369                state.add_token(FortranSyntaxKind::Minus, start, state.get_position());
370            }
371            '*' => {
372                state.advance(1);
373                if state.current() == Some('*') {
374                    state.advance(1);
375                    state.add_token(FortranSyntaxKind::StarStar, start, state.get_position());
376                }
377                else {
378                    state.add_token(FortranSyntaxKind::Star, start, state.get_position());
379                }
380            }
381            '/' => {
382                state.advance(1);
383                if state.current() == Some('=') {
384                    state.advance(1);
385                    state.add_token(FortranSyntaxKind::SlashEqual, start, state.get_position());
386                }
387                else {
388                    state.add_token(FortranSyntaxKind::Slash, start, state.get_position());
389                }
390            }
391            '<' => {
392                state.advance(1);
393                if state.current() == Some('=') {
394                    state.advance(1);
395                    state.add_token(FortranSyntaxKind::LessEqual, start, state.get_position());
396                }
397                else {
398                    state.add_token(FortranSyntaxKind::Less, start, state.get_position());
399                }
400            }
401            '>' => {
402                state.advance(1);
403                if state.current() == Some('=') {
404                    state.advance(1);
405                    state.add_token(FortranSyntaxKind::GreaterEqual, start, state.get_position());
406                }
407                else {
408                    state.add_token(FortranSyntaxKind::Greater, start, state.get_position());
409                }
410            }
411            '.' => {
412                state.advance(1);
413                state.add_token(FortranSyntaxKind::Dot, start, state.get_position());
414            }
415            ':' => {
416                state.advance(1);
417                if state.current() == Some(':') {
418                    state.advance(1);
419                    state.add_token(FortranSyntaxKind::ColonColon, start, state.get_position());
420                }
421                else {
422                    state.add_token(FortranSyntaxKind::Colon, start, state.get_position());
423                }
424            }
425            ';' => {
426                state.advance(1);
427                state.add_token(FortranSyntaxKind::Semicolon, start, state.get_position());
428            }
429            '&' => {
430                state.advance(1);
431                state.add_token(FortranSyntaxKind::Ampersand, start, state.get_position());
432            }
433            '%' => {
434                state.advance(1);
435                state.add_token(FortranSyntaxKind::Percent, start, state.get_position());
436            }
437            _ => {
438                return false;
439            }
440        }
441        true
442    }
443}