1#![doc = include_str!("readme.md")]
2pub mod token_type;
3pub use token_type::RTokenType;
4
5use crate::language::RLanguage;
6use oak_core::{Lexer, LexerCache, LexerState, Range, lexer::LexOutput, source::Source};
7
8type State<'s, S> = LexerState<'s, S, RLanguage>;
9
10#[derive(Clone)]
11pub struct RLexer<'config> {
12 _config: &'config RLanguage,
13}
14
15impl<'config> Lexer<RLanguage> for RLexer<'config> {
16 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<RLanguage>) -> LexOutput<RLanguage> {
17 let mut state = State::new(source);
18 let result = self.run(&mut state);
19 if result.is_ok() {
20 state.add_eof()
21 }
22 state.finish_with_cache(result, cache)
23 }
24}
25
26impl<'config> RLexer<'config> {
27 pub fn new(_config: &'config RLanguage) -> Self {
28 Self { _config }
29 }
30
31 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), oak_core::OakError> {
32 while state.not_at_end() {
33 let safe_point = state.get_position();
34 if self.skip_whitespace(state) {
35 continue;
36 }
37
38 if self.lex_comment(state) {
39 continue;
40 }
41
42 if self.lex_string_literal(state) {
43 continue;
44 }
45
46 if self.lex_number_literal(state) {
47 continue;
48 }
49
50 if self.lex_identifier_or_keyword(state) {
51 continue;
52 }
53
54 if self.lex_operators(state) {
55 continue;
56 }
57
58 if self.lex_single_char_tokens(state) {
59 continue;
60 }
61
62 if self.lex_other(state) {
63 continue;
64 }
65
66 state.advance_if_dead_lock(safe_point)
67 }
68 Ok(())
69 }
70
71 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
73 if let Some(ch) = state.current() {
74 if ch.is_whitespace() {
75 state.advance(ch.len_utf8());
76 return true;
77 }
78 }
79 false
80 }
81
82 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
84 if let Some('#') = state.current() {
85 let start_pos = state.get_position();
86 state.advance(1); while let Some(ch) = state.current() {
90 if ch == '\n' || ch == '\r' {
91 break;
92 }
93 state.advance(ch.len_utf8())
94 }
95
96 state.add_token(RTokenType::Comment, start_pos, state.get_position());
97 return true;
98 }
99 false
100 }
101
102 fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
104 if let Some(quote) = state.current() {
105 if quote == '"' || quote == '\'' {
106 let start_pos = state.get_position();
107 state.advance(1); while let Some(ch) = state.current() {
110 if ch == quote {
111 state.advance(1); state.add_token(RTokenType::StringLiteral, start_pos, state.get_position());
113 return true;
114 }
115 if ch == '\\' {
116 state.advance(1);
117 if let Some(escaped) = state.current() {
118 state.advance(escaped.len_utf8());
119 continue;
120 }
121 }
122 state.advance(ch.len_utf8())
123 }
124
125 state.add_token(RTokenType::StringLiteral, start_pos, state.get_position());
127 return true;
128 }
129 }
130 false
131 }
132
133 fn lex_number_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
135 if let Some(ch) = state.current() {
136 if ch.is_ascii_digit() || (ch == '.' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit())) {
137 let start_pos = state.get_position();
138 let mut has_dot = false;
139
140 while let Some(c) = state.current() {
141 if c.is_ascii_digit() {
142 state.advance(1)
143 }
144 else if c == '.' && !has_dot {
145 has_dot = true;
146 state.advance(1)
147 }
148 else if (c == 'e' || c == 'E') && !state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit() || c == '+' || c == '-') {
149 break;
150 }
151 else if c == 'e' || c == 'E' {
152 state.advance(1);
153 if let Some(next) = state.current() {
154 if next == '+' || next == '-' {
155 state.advance(1)
156 }
157 }
158 while let Some(digit) = state.current() {
159 if digit.is_ascii_digit() { state.advance(1) } else { break }
160 }
161 break;
162 }
163 else if c == 'L' {
164 state.advance(1);
165 state.add_token(RTokenType::IntegerLiteral, start_pos, state.get_position());
166 return true;
167 }
168 else if c == 'i' {
169 state.advance(1);
170 state.add_token(RTokenType::FloatLiteral, start_pos, state.get_position());
171 return true;
172 }
173 else {
174 break;
175 }
176 }
177
178 let kind = if has_dot { RTokenType::FloatLiteral } else { RTokenType::IntegerLiteral };
179 state.add_token(kind, start_pos, state.get_position());
180 return true;
181 }
182 }
183 false
184 }
185
186 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
188 if let Some(ch) = state.current() {
189 if ch.is_alphabetic() || ch == '.' || ch == '_' {
190 let start_pos = state.get_position();
191 state.advance(ch.len_utf8());
192
193 while let Some(c) = state.current() {
194 if c.is_alphanumeric() || c == '.' || c == '_' { state.advance(c.len_utf8()) } else { break }
195 }
196
197 let text = state.get_text_in(Range { start: start_pos, end: state.get_position() });
198 let kind = match text.as_ref() {
199 "if" => RTokenType::If,
200 "else" => RTokenType::Else,
201 "for" => RTokenType::For,
202 "in" => RTokenType::In,
203 "while" => RTokenType::While,
204 "repeat" => RTokenType::Repeat,
205 "next" => RTokenType::Next,
206 "break" => RTokenType::Break,
207 "function" => RTokenType::Function,
208 "TRUE" => RTokenType::True,
209 "FALSE" => RTokenType::False,
210 "NULL" => RTokenType::Null,
211 "Inf" => RTokenType::Inf,
212 "NaN" => RTokenType::NaN,
213 "NA" => RTokenType::NA,
214 "NA_integer_" => RTokenType::NaInteger,
215 "NA_real_" => RTokenType::NaReal,
216 "NA_complex_" => RTokenType::NaComplex,
217 "NA_character_" => RTokenType::NaCharacter,
218 _ => RTokenType::Identifier,
219 };
220
221 state.add_token(kind, start_pos, state.get_position());
222 return true;
223 }
224 }
225 false
226 }
227
228 fn lex_operators<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
230 let start_pos = state.get_position();
231 if let Some(ch) = state.current() {
232 match ch {
233 '<' => {
234 state.advance(1);
235 if let Some('-') = state.current() {
236 state.advance(1);
237 state.add_token(RTokenType::LeftArrow, start_pos, state.get_position());
238 return true;
239 }
240 if let Some('<') = state.current() {
241 state.advance(1);
242 if let Some('-') = state.current() {
243 state.advance(1);
244 state.add_token(RTokenType::DoubleLeftArrow, start_pos, state.get_position());
245 return true;
246 }
247 }
248 if let Some('=') = state.current() {
249 state.advance(1);
250 state.add_token(RTokenType::LessEqual, start_pos, state.get_position());
251 return true;
252 }
253 state.add_token(RTokenType::Less, start_pos, state.get_position());
254 return true;
255 }
256 '-' => {
257 state.advance(1);
258 if let Some('>') = state.current() {
259 state.advance(1);
260 if let Some('>') = state.current() {
261 state.advance(1);
262 state.add_token(RTokenType::DoubleRightArrow, start_pos, state.get_position());
263 return true;
264 }
265 state.add_token(RTokenType::RightArrow, start_pos, state.get_position());
266 return true;
267 }
268 state.add_token(RTokenType::Minus, start_pos, state.get_position());
269 return true;
270 }
271 '=' => {
272 state.advance(1);
273 if let Some('=') = state.current() {
274 state.advance(1);
275 state.add_token(RTokenType::EqualEqual, start_pos, state.get_position());
276 return true;
277 }
278 state.add_token(RTokenType::Equal, start_pos, state.get_position());
279 return true;
280 }
281 '!' => {
282 state.advance(1);
283 if let Some('=') = state.current() {
284 state.advance(1);
285 state.add_token(RTokenType::NotEqual, start_pos, state.get_position());
286 return true;
287 }
288 state.add_token(RTokenType::Not, start_pos, state.get_position());
289 return true;
290 }
291 '>' => {
292 state.advance(1);
293 if let Some('=') = state.current() {
294 state.advance(1);
295 state.add_token(RTokenType::GreaterEqual, start_pos, state.get_position());
296 return true;
297 }
298 state.add_token(RTokenType::Greater, start_pos, state.get_position());
299 return true;
300 }
301 '&' => {
302 state.advance(1);
303 if let Some('&') = state.current() {
304 state.advance(1);
305 state.add_token(RTokenType::AndAnd, start_pos, state.get_position());
306 return true;
307 }
308 state.add_token(RTokenType::And, start_pos, state.get_position());
309 return true;
310 }
311 '|' => {
312 state.advance(1);
313 if let Some('|') = state.current() {
314 state.advance(1);
315 state.add_token(RTokenType::OrOr, start_pos, state.get_position());
316 return true;
317 }
318 if let Some('>') = state.current() {
319 state.advance(1);
320 state.add_token(RTokenType::Pipe, start_pos, state.get_position());
321 return true;
322 }
323 state.add_token(RTokenType::Or, start_pos, state.get_position());
324 return true;
325 }
326 '%' => {
327 state.advance(1);
328 while let Some(c) = state.current() {
329 state.advance(c.len_utf8());
330 if c == '%' {
331 state.add_token(RTokenType::Operator, start_pos, state.get_position());
332 return true;
333 }
334 }
335 state.add_token(RTokenType::Operator, start_pos, state.get_position());
337 return true;
338 }
339 _ => {}
340 }
341 }
342 false
343 }
344
345 fn lex_single_char_tokens<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
347 if let Some(ch) = state.current() {
348 let start_pos = state.get_position();
349 let kind = match ch {
350 '(' => Some(RTokenType::LeftParen),
351 ')' => Some(RTokenType::RightParen),
352 '[' => Some(RTokenType::LeftBracket),
353 ']' => Some(RTokenType::RightBracket),
354 '{' => Some(RTokenType::LeftBrace),
355 '}' => Some(RTokenType::RightBrace),
356 ',' => Some(RTokenType::Comma),
357 ';' => Some(RTokenType::Semicolon),
358 '+' => Some(RTokenType::Plus),
359 '*' => Some(RTokenType::Star),
360 '/' => Some(RTokenType::Slash),
361 '^' => Some(RTokenType::Caret),
362 '$' => Some(RTokenType::Dollar),
363 '@' => Some(RTokenType::At),
364 '~' => Some(RTokenType::Tilde),
365 ':' => {
366 state.advance(1);
367 if let Some(':') = state.current() {
368 state.advance(1);
369 if let Some(':') = state.current() {
370 state.advance(1);
371 Some(RTokenType::TripleColon)
372 }
373 else {
374 Some(RTokenType::DoubleColon)
375 }
376 }
377 else {
378 return {
379 state.add_token(RTokenType::Colon, start_pos, state.get_position());
380 true
381 };
382 }
383 }
384 '?' => Some(RTokenType::Question),
385 _ => None,
386 };
387
388 if let Some(k) = kind {
389 if !matches!(k, RTokenType::TripleColon | RTokenType::DoubleColon) {
390 state.advance(1);
391 }
392 state.add_token(k, start_pos, state.get_position());
393 return true;
394 }
395 }
396 false
397 }
398
399 fn lex_other<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
401 if let Some(ch) = state.current() {
402 let start_pos = state.get_position();
403 let len = ch.len_utf8();
404 state.advance(len);
405 state.add_token(RTokenType::Error, start_pos, state.get_position());
406 return true;
407 }
408 false
409 }
410}