1use crate::{kind::RSyntaxKind, language::RLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, Range, lexer::LexOutput, source::Source};
3
4type State<'s, S> = LexerState<'s, S, RLanguage>;
5
6#[derive(Clone)]
7pub struct RLexer<'config> {
8 _config: &'config RLanguage,
9}
10
11impl<'config> Lexer<RLanguage> for RLexer<'config> {
12 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<RLanguage>) -> LexOutput<RLanguage> {
13 let mut state = State::new(source);
14 let result = self.run(&mut state);
15 if result.is_ok() {
16 state.add_eof();
17 }
18 state.finish_with_cache(result, cache)
19 }
20}
21
22impl<'config> RLexer<'config> {
23 pub fn new(_config: &'config RLanguage) -> Self {
24 Self { _config }
25 }
26
27 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), oak_core::OakError> {
28 while state.not_at_end() {
29 let safe_point = state.get_position();
30 if self.skip_whitespace(state) {
31 continue;
32 }
33
34 if self.lex_comment(state) {
35 continue;
36 }
37
38 if self.lex_string_literal(state) {
39 continue;
40 }
41
42 if self.lex_number_literal(state) {
43 continue;
44 }
45
46 if self.lex_identifier_or_keyword(state) {
47 continue;
48 }
49
50 if self.lex_operators(state) {
51 continue;
52 }
53
54 if self.lex_single_char_tokens(state) {
55 continue;
56 }
57
58 if self.lex_other(state) {
59 continue;
60 }
61
62 state.advance_if_dead_lock(safe_point);
63 }
64 Ok(())
65 }
66
67 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
69 if let Some(ch) = state.current() {
70 if ch.is_whitespace() {
71 state.advance(ch.len_utf8());
72 return true;
73 }
74 }
75 false
76 }
77
78 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
80 if let Some('#') = state.current() {
81 let start_pos = state.get_position();
82 state.advance(1); while let Some(ch) = state.current() {
86 if ch == '\n' || ch == '\r' {
87 break;
88 }
89 state.advance(ch.len_utf8());
90 }
91
92 state.add_token(RSyntaxKind::Comment, start_pos, state.get_position());
93 return true;
94 }
95 false
96 }
97
98 fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
100 if let Some(quote) = state.current() {
101 if quote == '"' || quote == '\'' {
102 let start_pos = state.get_position();
103 state.advance(1); while let Some(ch) = state.current() {
106 if ch == quote {
107 state.advance(1); state.add_token(RSyntaxKind::StringLiteral, start_pos, state.get_position());
109 return true;
110 }
111 if ch == '\\' {
112 state.advance(1);
113 if let Some(escaped) = state.current() {
114 state.advance(escaped.len_utf8());
115 continue;
116 }
117 }
118 state.advance(ch.len_utf8());
119 }
120
121 state.add_token(RSyntaxKind::StringLiteral, start_pos, state.get_position());
123 return true;
124 }
125 }
126 false
127 }
128
129 fn lex_number_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
131 if let Some(ch) = state.current() {
132 if ch.is_ascii_digit() || (ch == '.' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit())) {
133 let start_pos = state.get_position();
134 let mut has_dot = false;
135
136 while let Some(c) = state.current() {
137 if c.is_ascii_digit() {
138 state.advance(1);
139 }
140 else if c == '.' && !has_dot {
141 has_dot = true;
142 state.advance(1);
143 }
144 else if (c == 'e' || c == 'E') && !state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit() || c == '+' || c == '-') {
145 break;
146 }
147 else if c == 'e' || c == 'E' {
148 state.advance(1);
149 if let Some(next) = state.current() {
150 if next == '+' || next == '-' {
151 state.advance(1);
152 }
153 }
154 while let Some(digit) = state.current() {
155 if digit.is_ascii_digit() {
156 state.advance(1);
157 }
158 else {
159 break;
160 }
161 }
162 break;
163 }
164 else if c == 'L' {
165 state.advance(1);
166 state.add_token(RSyntaxKind::IntegerLiteral, start_pos, state.get_position());
167 return true;
168 }
169 else if c == 'i' {
170 state.advance(1);
171 state.add_token(RSyntaxKind::FloatLiteral, start_pos, state.get_position());
172 return true;
173 }
174 else {
175 break;
176 }
177 }
178
179 let kind = if has_dot { RSyntaxKind::FloatLiteral } else { RSyntaxKind::IntegerLiteral };
180 state.add_token(kind, start_pos, state.get_position());
181 return true;
182 }
183 }
184 false
185 }
186
187 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
189 if let Some(ch) = state.current() {
190 if ch.is_alphabetic() || ch == '.' || ch == '_' {
191 let start_pos = state.get_position();
192 state.advance(ch.len_utf8());
193
194 while let Some(c) = state.current() {
195 if c.is_alphanumeric() || c == '.' || c == '_' {
196 state.advance(c.len_utf8());
197 }
198 else {
199 break;
200 }
201 }
202
203 let text = state.get_text_in(Range { start: start_pos, end: state.get_position() });
204 let kind = match text.as_ref() {
205 "if" => RSyntaxKind::If,
206 "else" => RSyntaxKind::Else,
207 "for" => RSyntaxKind::For,
208 "in" => RSyntaxKind::In,
209 "while" => RSyntaxKind::While,
210 "repeat" => RSyntaxKind::Repeat,
211 "next" => RSyntaxKind::Next,
212 "break" => RSyntaxKind::Break,
213 "function" => RSyntaxKind::Function,
214 "TRUE" => RSyntaxKind::True,
215 "FALSE" => RSyntaxKind::False,
216 "NULL" => RSyntaxKind::Null,
217 "Inf" => RSyntaxKind::Inf,
218 "NaN" => RSyntaxKind::NaN,
219 "NA" => RSyntaxKind::NA,
220 "NA_integer_" => RSyntaxKind::NaInteger,
221 "NA_real_" => RSyntaxKind::NaReal,
222 "NA_complex_" => RSyntaxKind::NaComplex,
223 "NA_character_" => RSyntaxKind::NaCharacter,
224 _ => RSyntaxKind::Identifier,
225 };
226
227 state.add_token(kind, start_pos, state.get_position());
228 return true;
229 }
230 }
231 false
232 }
233
234 fn lex_operators<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
236 let start_pos = state.get_position();
237 if let Some(ch) = state.current() {
238 match ch {
239 '<' => {
240 state.advance(1);
241 if let Some('-') = state.current() {
242 state.advance(1);
243 state.add_token(RSyntaxKind::LeftArrow, start_pos, state.get_position());
244 return true;
245 }
246 if let Some('<') = state.current() {
247 state.advance(1);
248 if let Some('-') = state.current() {
249 state.advance(1);
250 state.add_token(RSyntaxKind::DoubleLeftArrow, start_pos, state.get_position());
251 return true;
252 }
253 }
254 if let Some('=') = state.current() {
255 state.advance(1);
256 state.add_token(RSyntaxKind::LessEqual, start_pos, state.get_position());
257 return true;
258 }
259 state.add_token(RSyntaxKind::Less, start_pos, state.get_position());
260 return true;
261 }
262 '-' => {
263 state.advance(1);
264 if let Some('>') = state.current() {
265 state.advance(1);
266 if let Some('>') = state.current() {
267 state.advance(1);
268 state.add_token(RSyntaxKind::DoubleRightArrow, start_pos, state.get_position());
269 return true;
270 }
271 state.add_token(RSyntaxKind::RightArrow, start_pos, state.get_position());
272 return true;
273 }
274 state.add_token(RSyntaxKind::Minus, start_pos, state.get_position());
275 return true;
276 }
277 '=' => {
278 state.advance(1);
279 if let Some('=') = state.current() {
280 state.advance(1);
281 state.add_token(RSyntaxKind::EqualEqual, start_pos, state.get_position());
282 return true;
283 }
284 state.add_token(RSyntaxKind::Equal, start_pos, state.get_position());
285 return true;
286 }
287 '!' => {
288 state.advance(1);
289 if let Some('=') = state.current() {
290 state.advance(1);
291 state.add_token(RSyntaxKind::NotEqual, start_pos, state.get_position());
292 return true;
293 }
294 state.add_token(RSyntaxKind::Not, start_pos, state.get_position());
295 return true;
296 }
297 '>' => {
298 state.advance(1);
299 if let Some('=') = state.current() {
300 state.advance(1);
301 state.add_token(RSyntaxKind::GreaterEqual, start_pos, state.get_position());
302 return true;
303 }
304 state.add_token(RSyntaxKind::Greater, start_pos, state.get_position());
305 return true;
306 }
307 '&' => {
308 state.advance(1);
309 if let Some('&') = state.current() {
310 state.advance(1);
311 state.add_token(RSyntaxKind::AndAnd, start_pos, state.get_position());
312 return true;
313 }
314 state.add_token(RSyntaxKind::And, start_pos, state.get_position());
315 return true;
316 }
317 '|' => {
318 state.advance(1);
319 if let Some('|') = state.current() {
320 state.advance(1);
321 state.add_token(RSyntaxKind::OrOr, start_pos, state.get_position());
322 return true;
323 }
324 if let Some('>') = state.current() {
325 state.advance(1);
326 state.add_token(RSyntaxKind::Pipe, start_pos, state.get_position());
327 return true;
328 }
329 state.add_token(RSyntaxKind::Or, start_pos, state.get_position());
330 return true;
331 }
332 '%' => {
333 state.advance(1);
334 while let Some(c) = state.current() {
335 state.advance(c.len_utf8());
336 if c == '%' {
337 state.add_token(RSyntaxKind::Operator, start_pos, state.get_position());
338 return true;
339 }
340 }
341 state.add_token(RSyntaxKind::Operator, start_pos, state.get_position());
343 return true;
344 }
345 _ => {}
346 }
347 }
348 false
349 }
350
351 fn lex_single_char_tokens<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
353 if let Some(ch) = state.current() {
354 let start_pos = state.get_position();
355 let kind = match ch {
356 '(' => Some(RSyntaxKind::LeftParen),
357 ')' => Some(RSyntaxKind::RightParen),
358 '[' => Some(RSyntaxKind::LeftBracket),
359 ']' => Some(RSyntaxKind::RightBracket),
360 '{' => Some(RSyntaxKind::LeftBrace),
361 '}' => Some(RSyntaxKind::RightBrace),
362 ',' => Some(RSyntaxKind::Comma),
363 ';' => Some(RSyntaxKind::Semicolon),
364 '+' => Some(RSyntaxKind::Plus),
365 '*' => Some(RSyntaxKind::Star),
366 '/' => Some(RSyntaxKind::Slash),
367 '^' => Some(RSyntaxKind::Caret),
368 '$' => Some(RSyntaxKind::Dollar),
369 '@' => Some(RSyntaxKind::At),
370 '~' => Some(RSyntaxKind::Tilde),
371 ':' => {
372 state.advance(1);
373 if let Some(':') = state.current() {
374 state.advance(1);
375 if let Some(':') = state.current() {
376 state.advance(1);
377 Some(RSyntaxKind::TripleColon)
378 }
379 else {
380 Some(RSyntaxKind::DoubleColon)
381 }
382 }
383 else {
384 return {
385 state.add_token(RSyntaxKind::Colon, start_pos, state.get_position());
386 true
387 };
388 }
389 }
390 '?' => Some(RSyntaxKind::Question),
391 _ => None,
392 };
393
394 if let Some(k) = kind {
395 if !matches!(k, RSyntaxKind::TripleColon | RSyntaxKind::DoubleColon) {
396 state.advance(1);
397 }
398 state.add_token(k, start_pos, state.get_position());
399 return true;
400 }
401 }
402 false
403 }
404
405 fn lex_other<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
407 if let Some(ch) = state.current() {
408 let start_pos = state.get_position();
409 let len = ch.len_utf8();
410 state.advance(len);
411 state.add_token(RSyntaxKind::Error, start_pos, state.get_position());
412 return true;
413 }
414 false
415 }
416}