1use crate::{kind::FortranSyntaxKind, language::FortranLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, FortranLanguage>;
5
6#[derive(Clone)]
7pub struct FortranLexer<'config> {
8 _config: &'config FortranLanguage,
9}
10
11impl<'config> Lexer<FortranLanguage> for FortranLexer<'config> {
12 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<FortranLanguage>) -> LexOutput<FortranLanguage> {
13 let mut state = LexerState::new(source);
14 let result = self.run(&mut state);
15 if result.is_ok() {
16 state.add_eof();
17 }
18 state.finish_with_cache(result, cache)
19 }
20}
21
22impl<'config> FortranLexer<'config> {
23 pub fn new(config: &'config FortranLanguage) -> Self {
24 Self { _config: config }
25 }
26
27 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
28 while state.not_at_end() {
29 let safe_point = state.get_position();
30
31 if self.lex_newline(state) {
32 continue;
33 }
34
35 if self.skip_whitespace(state) {
36 continue;
37 }
38
39 if self.skip_comment(state) {
40 continue;
41 }
42
43 if self.lex_string_literal(state) {
44 continue;
45 }
46
47 if self.lex_char_literal(state) {
48 continue;
49 }
50
51 if self.lex_number_literal(state) {
52 continue;
53 }
54
55 if self.lex_identifier_or_keyword(state) {
56 continue;
57 }
58
59 if self.lex_operator_or_single_char(state) {
60 continue;
61 }
62
63 if let Some(c) = state.current() {
65 state.advance(c.len_utf8());
66 }
67
68 state.advance_if_dead_lock(safe_point);
69 }
70
71 Ok(())
72 }
73
74 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
75 let start = state.get_position();
76 if let Some(ch) = state.current() {
77 if ch == '\n' {
78 state.advance(1);
79 state.add_token(FortranSyntaxKind::Newline, start, state.get_position());
80 return true;
81 }
82 if ch == '\r' {
83 state.advance(1);
84 if state.current() == Some('\n') {
85 state.advance(1);
86 }
87 state.add_token(FortranSyntaxKind::Newline, start, state.get_position());
88 return true;
89 }
90 }
91 false
92 }
93
94 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
95 let mut advanced = false;
96
97 while let Some(ch) = state.current() {
98 if ch == ' ' || ch == '\t' {
99 state.advance(ch.len_utf8());
100 advanced = true;
101 }
102 else {
103 break;
104 }
105 }
106
107 advanced
108 }
109
110 fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
111 if let Some(ch) = state.current() {
113 if ch == '!' {
114 while let Some(c) = state.current() {
116 if c == '\n' || c == '\r' {
117 break;
118 }
119 state.advance(c.len_utf8());
120 }
121 return true;
122 }
123 }
124 false
125 }
126
127 fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
128 let start = state.get_position();
129
130 if state.current() != Some('"') {
131 return false;
132 }
133
134 state.advance(1); while let Some(ch) = state.current() {
137 if ch == '"' {
138 state.advance(1); break;
140 }
141 if ch == '\n' || ch == '\r' {
142 break; }
144 state.advance(ch.len_utf8());
145 }
146
147 state.add_token(FortranSyntaxKind::StringLiteral, start, state.get_position());
148 true
149 }
150
151 fn lex_char_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
152 let start = state.get_position();
153
154 if state.current() != Some('\'') {
155 return false;
156 }
157
158 state.advance(1); if let Some(ch) = state.current() {
162 if ch != '\'' && ch != '\n' && ch != '\r' {
163 state.advance(ch.len_utf8());
164 }
165 }
166
167 if state.current() == Some('\'') {
169 state.advance(1);
170 }
171
172 state.add_token(FortranSyntaxKind::CharLiteral, start, state.get_position());
173 true
174 }
175
176 fn lex_number_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
177 let start = state.get_position();
178 let first = match state.current() {
179 Some(c) => c,
180 None => return false,
181 };
182
183 if !first.is_ascii_digit() {
184 return false;
185 }
186
187 state.advance(1);
189 while let Some(c) = state.current() {
190 if c.is_ascii_digit() || c == '_' {
191 state.advance(1);
192 }
193 else {
194 break;
195 }
196 }
197
198 if state.current() == Some('.') {
200 let n1 = state.peek_next_n(1);
201 if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
202 state.advance(1); while let Some(c) = state.current() {
204 if c.is_ascii_digit() || c == '_' {
205 state.advance(1);
206 }
207 else {
208 break;
209 }
210 }
211 }
212 }
213
214 if let Some(c) = state.current() {
216 if c == 'e' || c == 'E' || c == 'd' || c == 'D' {
217 let n1 = state.peek_next_n(1);
218 if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
219 state.advance(1);
220 if let Some(sign) = state.current() {
221 if sign == '+' || sign == '-' {
222 state.advance(1);
223 }
224 }
225 while let Some(d) = state.current() {
226 if d.is_ascii_digit() || d == '_' {
227 state.advance(1);
228 }
229 else {
230 break;
231 }
232 }
233 }
234 }
235 }
236
237 let end = state.get_position();
238 state.add_token(FortranSyntaxKind::NumberLiteral, start, end);
239 true
240 }
241
242 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
243 let start = state.get_position();
244 let first = match state.current() {
245 Some(c) => c,
246 None => return false,
247 };
248
249 if !first.is_ascii_alphabetic() && first != '_' {
250 return false;
251 }
252
253 state.advance(1);
254 while let Some(c) = state.current() {
255 if c.is_ascii_alphanumeric() || c == '_' {
256 state.advance(1);
257 }
258 else {
259 break;
260 }
261 }
262
263 let end = state.get_position();
264 let text = state.get_text_in((start..end).into());
265
266 let kind = match text.to_lowercase().as_str() {
267 "program" => FortranSyntaxKind::Program,
268 "end" => FortranSyntaxKind::End,
269 "subroutine" => FortranSyntaxKind::Subroutine,
270 "function" => FortranSyntaxKind::Function,
271 "integer" => FortranSyntaxKind::Integer,
272 "real" => FortranSyntaxKind::Real,
273 "double" => FortranSyntaxKind::Double,
274 "precision" => FortranSyntaxKind::Precision,
275 "character" => FortranSyntaxKind::Character,
276 "logical" => FortranSyntaxKind::Logical,
277 "complex" => FortranSyntaxKind::Complex,
278 "if" => FortranSyntaxKind::If,
279 "then" => FortranSyntaxKind::Then,
280 "else" => FortranSyntaxKind::Else,
281 "elseif" => FortranSyntaxKind::ElseIf,
282 "endif" => FortranSyntaxKind::EndIf,
283 "do" => FortranSyntaxKind::Do,
284 "enddo" => FortranSyntaxKind::EndDo,
285 "while" => FortranSyntaxKind::While,
286 "call" => FortranSyntaxKind::Call,
287 "return" => FortranSyntaxKind::Return,
288 "stop" => FortranSyntaxKind::Stop,
289 "continue" => FortranSyntaxKind::Continue,
290 "goto" => FortranSyntaxKind::Goto,
291 "implicit" => FortranSyntaxKind::Implicit,
292 "none" => FortranSyntaxKind::None,
293 "parameter" => FortranSyntaxKind::Parameter,
294 "dimension" => FortranSyntaxKind::Dimension,
295 "common" => FortranSyntaxKind::Common,
296 "equivalence" => FortranSyntaxKind::Equivalence,
297 "external" => FortranSyntaxKind::External,
298 "intrinsic" => FortranSyntaxKind::Intrinsic,
299 "save" => FortranSyntaxKind::Save,
300 "data" => FortranSyntaxKind::Data,
301 "format" => FortranSyntaxKind::Format,
302 "read" => FortranSyntaxKind::Read,
303 "write" => FortranSyntaxKind::Write,
304 "print" => FortranSyntaxKind::Print,
305 "open" => FortranSyntaxKind::Open,
306 "close" => FortranSyntaxKind::Close,
307 "inquire" => FortranSyntaxKind::Inquire,
308 "rewind" => FortranSyntaxKind::Rewind,
309 "backspace" => FortranSyntaxKind::Backspace,
310 "endfile" => FortranSyntaxKind::EndFile,
311 "true" => FortranSyntaxKind::True,
312 "false" => FortranSyntaxKind::False,
313 "and" => FortranSyntaxKind::And,
314 "or" => FortranSyntaxKind::Or,
315 "not" => FortranSyntaxKind::Not,
316 "eq" => FortranSyntaxKind::Eq,
317 "ne" => FortranSyntaxKind::Ne,
318 "lt" => FortranSyntaxKind::Lt,
319 "le" => FortranSyntaxKind::Le,
320 "gt" => FortranSyntaxKind::Gt,
321 "ge" => FortranSyntaxKind::Ge,
322 _ => FortranSyntaxKind::Identifier,
323 };
324
325 state.add_token(kind, start, end);
326 true
327 }
328
329 fn lex_operator_or_single_char<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
330 let start = state.get_position();
331 let c = match state.current() {
332 Some(c) => c,
333 None => return false,
334 };
335
336 match c {
337 '\n' => {
338 state.advance(1);
339 state.add_token(FortranSyntaxKind::Newline, start, state.get_position());
340 }
341 '(' => {
342 state.advance(1);
343 state.add_token(FortranSyntaxKind::LeftParen, start, state.get_position());
344 }
345 ')' => {
346 state.advance(1);
347 state.add_token(FortranSyntaxKind::RightParen, start, state.get_position());
348 }
349 ',' => {
350 state.advance(1);
351 state.add_token(FortranSyntaxKind::Comma, start, state.get_position());
352 }
353 '=' => {
354 state.advance(1);
355 if state.current() == Some('=') {
356 state.advance(1);
357 state.add_token(FortranSyntaxKind::EqualEqual, start, state.get_position());
358 }
359 else {
360 state.add_token(FortranSyntaxKind::Equal, start, state.get_position());
361 }
362 }
363 '+' => {
364 state.advance(1);
365 state.add_token(FortranSyntaxKind::Plus, start, state.get_position());
366 }
367 '-' => {
368 state.advance(1);
369 state.add_token(FortranSyntaxKind::Minus, start, state.get_position());
370 }
371 '*' => {
372 state.advance(1);
373 if state.current() == Some('*') {
374 state.advance(1);
375 state.add_token(FortranSyntaxKind::StarStar, start, state.get_position());
376 }
377 else {
378 state.add_token(FortranSyntaxKind::Star, start, state.get_position());
379 }
380 }
381 '/' => {
382 state.advance(1);
383 if state.current() == Some('=') {
384 state.advance(1);
385 state.add_token(FortranSyntaxKind::SlashEqual, start, state.get_position());
386 }
387 else {
388 state.add_token(FortranSyntaxKind::Slash, start, state.get_position());
389 }
390 }
391 '<' => {
392 state.advance(1);
393 if state.current() == Some('=') {
394 state.advance(1);
395 state.add_token(FortranSyntaxKind::LessEqual, start, state.get_position());
396 }
397 else {
398 state.add_token(FortranSyntaxKind::Less, start, state.get_position());
399 }
400 }
401 '>' => {
402 state.advance(1);
403 if state.current() == Some('=') {
404 state.advance(1);
405 state.add_token(FortranSyntaxKind::GreaterEqual, start, state.get_position());
406 }
407 else {
408 state.add_token(FortranSyntaxKind::Greater, start, state.get_position());
409 }
410 }
411 '.' => {
412 state.advance(1);
413 state.add_token(FortranSyntaxKind::Dot, start, state.get_position());
414 }
415 ':' => {
416 state.advance(1);
417 if state.current() == Some(':') {
418 state.advance(1);
419 state.add_token(FortranSyntaxKind::ColonColon, start, state.get_position());
420 }
421 else {
422 state.add_token(FortranSyntaxKind::Colon, start, state.get_position());
423 }
424 }
425 ';' => {
426 state.advance(1);
427 state.add_token(FortranSyntaxKind::Semicolon, start, state.get_position());
428 }
429 '&' => {
430 state.advance(1);
431 state.add_token(FortranSyntaxKind::Ampersand, start, state.get_position());
432 }
433 '%' => {
434 state.advance(1);
435 state.add_token(FortranSyntaxKind::Percent, start, state.get_position());
436 }
437 _ => {
438 return false;
439 }
440 }
441 true
442 }
443}