1use crate::{kind::NimSyntaxKind, language::NimLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, lexer::LexOutput, source::Source};
3use std::borrow::Cow;
4
5type State<'s, S> = LexerState<'s, S, NimLanguage>;
6
7#[derive(Clone, Debug)]
8pub struct NimLexer<'config> {
9 _config: &'config NimLanguage,
10}
11
12impl<'config> NimLexer<'config> {
13 pub fn new(config: &'config NimLanguage) -> Self {
14 Self { _config: config }
15 }
16
17 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
19 let start_pos = state.get_position();
20
21 while let Some(ch) = state.peek() {
22 if ch == ' ' || ch == '\t' {
23 state.advance(ch.len_utf8());
24 }
25 else {
26 break;
27 }
28 }
29
30 if state.get_position() > start_pos {
31 state.add_token(NimSyntaxKind::Whitespace, start_pos, state.get_position());
32 true
33 }
34 else {
35 false
36 }
37 }
38
39 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
41 let start_pos = state.get_position();
42
43 if let Some('\n') = state.peek() {
44 state.advance(1);
45 state.add_token(NimSyntaxKind::Newline, start_pos, state.get_position());
46 true
47 }
48 else if let Some('\r') = state.peek() {
49 state.advance(1);
50 if let Some('\n') = state.peek() {
51 state.advance(1);
52 }
53 state.add_token(NimSyntaxKind::Newline, start_pos, state.get_position());
54 true
55 }
56 else {
57 false
58 }
59 }
60
61 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
63 let start_pos = state.get_position();
64
65 if let Some('#') = state.peek() {
66 state.advance(1);
67
68 if let Some('#') = state.peek() {
70 state.advance(1);
71 }
72
73 while let Some(ch) = state.peek() {
75 if ch == '\n' || ch == '\r' {
76 break;
77 }
78 state.advance(ch.len_utf8());
79 }
80
81 let kind = NimSyntaxKind::CommentToken;
82
83 state.add_token(kind, start_pos, state.get_position());
84 true
85 }
86 else {
87 false
88 }
89 }
90
91 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
93 let start_pos = state.get_position();
94
95 if let Some('"') = state.peek() {
96 state.advance(1);
97
98 while let Some(ch) = state.peek() {
99 if ch == '"' {
100 state.advance(1);
101 break;
102 }
103 if ch == '\\' {
104 state.advance(1);
105 if let Some(c) = state.peek() {
106 state.advance(c.len_utf8());
107 }
108 }
109 else {
110 state.advance(ch.len_utf8());
111 }
112 }
113
114 state.add_token(NimSyntaxKind::StringLiteral, start_pos, state.get_position());
115 true
116 }
117 else {
118 false
119 }
120 }
121
122 fn lex_char<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
124 let start_pos = state.get_position();
125
126 if let Some('\'') = state.peek() {
127 state.advance(1);
128
129 if let Some('\\') = state.peek() {
130 state.advance(1);
131 if let Some(c) = state.peek() {
132 state.advance(c.len_utf8());
133 }
134 }
135 else if let Some(c) = state.peek() {
136 if c != '\'' {
137 state.advance(c.len_utf8());
138 }
139 }
140
141 if let Some('\'') = state.peek() {
142 state.advance(1);
143 }
144
145 state.add_token(NimSyntaxKind::CharLiteral, start_pos, state.get_position());
146 true
147 }
148 else {
149 false
150 }
151 }
152
153 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
155 let start_pos = state.get_position();
156
157 if let Some(ch) = state.peek() {
158 if ch.is_ascii_digit() {
159 state.advance(ch.len_utf8());
160
161 while let Some(ch) = state.peek() {
162 if ch.is_ascii_digit() || ch == '_' {
163 state.advance(ch.len_utf8());
164 }
165 else {
166 break;
167 }
168 }
169
170 let mut is_float = false;
172 if let Some('.') = state.peek() {
173 state.advance(1);
174 is_float = true;
175 while let Some(ch) = state.peek() {
176 if ch.is_ascii_digit() || ch == '_' {
177 state.advance(ch.len_utf8());
178 }
179 else {
180 break;
181 }
182 }
183 }
184
185 let kind = if is_float { NimSyntaxKind::FloatLiteral } else { NimSyntaxKind::IntLiteral };
186 state.add_token(kind, start_pos, state.get_position());
187 true
188 }
189 else {
190 false
191 }
192 }
193 else {
194 false
195 }
196 }
197
198 fn lex_identifier<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
200 let start_pos = state.get_position();
201
202 if let Some(ch) = state.peek() {
203 if ch.is_alphabetic() || ch == '_' {
204 state.advance(ch.len_utf8());
205
206 while let Some(ch) = state.peek() {
207 if ch.is_alphanumeric() || ch == '_' {
208 state.advance(ch.len_utf8());
209 }
210 else {
211 break;
212 }
213 }
214
215 let text = state.get_text_in((start_pos..state.get_position()).into());
216 let kind = match text {
217 Cow::Borrowed("and") => NimSyntaxKind::AndKeyword,
218 Cow::Borrowed("or") => NimSyntaxKind::OrKeyword,
219 Cow::Borrowed("not") => NimSyntaxKind::NotKeyword,
220 Cow::Borrowed("if") => NimSyntaxKind::IfKeyword,
221 Cow::Borrowed("else") => NimSyntaxKind::ElseKeyword,
222 Cow::Borrowed("elif") => NimSyntaxKind::ElifKeyword,
223 Cow::Borrowed("while") => NimSyntaxKind::WhileKeyword,
224 Cow::Borrowed("for") => NimSyntaxKind::ForKeyword,
225 Cow::Borrowed("proc") => NimSyntaxKind::ProcKeyword,
226 Cow::Borrowed("func") => NimSyntaxKind::FuncKeyword,
227 Cow::Borrowed("var") => NimSyntaxKind::VarKeyword,
228 Cow::Borrowed("let") => NimSyntaxKind::LetKeyword,
229 Cow::Borrowed("const") => NimSyntaxKind::ConstKeyword,
230 Cow::Borrowed("type") => NimSyntaxKind::TypeKeyword,
231 Cow::Borrowed("import") => NimSyntaxKind::ImportKeyword,
232 Cow::Borrowed("from") => NimSyntaxKind::FromKeyword,
233 Cow::Borrowed("include") => NimSyntaxKind::IncludeKeyword,
234 Cow::Borrowed("return") => NimSyntaxKind::ReturnKeyword,
235 Cow::Borrowed("yield") => NimSyntaxKind::YieldKeyword,
236 Cow::Borrowed("break") => NimSyntaxKind::BreakKeyword,
237 Cow::Borrowed("continue") => NimSyntaxKind::ContinueKeyword,
238 Cow::Borrowed("try") => NimSyntaxKind::TryKeyword,
239 Cow::Borrowed("except") => NimSyntaxKind::ExceptKeyword,
240 Cow::Borrowed("finally") => NimSyntaxKind::FinallyKeyword,
241 Cow::Borrowed("raise") => NimSyntaxKind::RaiseKeyword,
242 Cow::Borrowed("case") => NimSyntaxKind::CaseKeyword,
243 Cow::Borrowed("of") => NimSyntaxKind::OfKeyword,
244 Cow::Borrowed("when") => NimSyntaxKind::WhenKeyword,
245 Cow::Borrowed("is") => NimSyntaxKind::IsKeyword,
246 Cow::Borrowed("in") => NimSyntaxKind::InKeyword,
247 Cow::Borrowed("nil") => NimSyntaxKind::NilKeyword,
248 _ => NimSyntaxKind::Identifier,
249 };
250
251 state.add_token(kind, start_pos, state.get_position());
252 true
253 }
254 else {
255 false
256 }
257 }
258 else {
259 false
260 }
261 }
262
263 fn lex_operator<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
265 if let Some(ch) = state.peek() {
266 let start_pos = state.get_position();
267
268 match ch {
269 '+' => {
270 state.advance(1);
271 state.add_token(NimSyntaxKind::Plus, start_pos, state.get_position());
272 true
273 }
274 '-' => {
275 state.advance(1);
276 state.add_token(NimSyntaxKind::Minus, start_pos, state.get_position());
277 true
278 }
279 '*' => {
280 state.advance(1);
281 state.add_token(NimSyntaxKind::Star, start_pos, state.get_position());
282 true
283 }
284 '/' => {
285 state.advance(1);
286 state.add_token(NimSyntaxKind::Slash, start_pos, state.get_position());
287 true
288 }
289 '=' => {
290 state.advance(1);
291 if state.peek() == Some('=') {
292 state.advance(1);
293 state.add_token(NimSyntaxKind::EqualEqual, start_pos, state.get_position());
294 }
295 else {
296 state.add_token(NimSyntaxKind::Equal, start_pos, state.get_position());
297 }
298 true
299 }
300 '!' => {
301 state.advance(1);
302 if state.peek() == Some('=') {
303 state.advance(1);
304 state.add_token(NimSyntaxKind::NotEqual, start_pos, state.get_position());
305 }
306 else {
307 state.add_token(NimSyntaxKind::Error, start_pos, state.get_position());
308 }
309 true
310 }
311 '<' => {
312 state.advance(1);
313 if state.peek() == Some('=') {
314 state.advance(1);
315 state.add_token(NimSyntaxKind::LessEqual, start_pos, state.get_position());
316 }
317 else if state.peek() == Some('<') {
318 state.advance(1);
319 state.add_token(NimSyntaxKind::LeftShift, start_pos, state.get_position());
320 }
321 else {
322 state.add_token(NimSyntaxKind::Less, start_pos, state.get_position());
323 }
324 true
325 }
326 '>' => {
327 state.advance(1);
328 if state.peek() == Some('=') {
329 state.advance(1);
330 state.add_token(NimSyntaxKind::GreaterEqual, start_pos, state.get_position());
331 }
332 else if state.peek() == Some('>') {
333 state.advance(1);
334 state.add_token(NimSyntaxKind::RightShift, start_pos, state.get_position());
335 }
336 else {
337 state.add_token(NimSyntaxKind::Greater, start_pos, state.get_position());
338 }
339 true
340 }
341 '(' => {
342 state.advance(1);
343 state.add_token(NimSyntaxKind::LeftParen, start_pos, state.get_position());
344 true
345 }
346 ')' => {
347 state.advance(1);
348 state.add_token(NimSyntaxKind::RightParen, start_pos, state.get_position());
349 true
350 }
351 '[' => {
352 state.advance(1);
353 state.add_token(NimSyntaxKind::LeftBracket, start_pos, state.get_position());
354 true
355 }
356 ']' => {
357 state.advance(1);
358 state.add_token(NimSyntaxKind::RightBracket, start_pos, state.get_position());
359 true
360 }
361 '{' => {
362 state.advance(1);
363 state.add_token(NimSyntaxKind::LeftBrace, start_pos, state.get_position());
364 true
365 }
366 '}' => {
367 state.advance(1);
368 state.add_token(NimSyntaxKind::RightBrace, start_pos, state.get_position());
369 true
370 }
371 ',' => {
372 state.advance(1);
373 state.add_token(NimSyntaxKind::Comma, start_pos, state.get_position());
374 true
375 }
376 ';' => {
377 state.advance(1);
378 state.add_token(NimSyntaxKind::Semicolon, start_pos, state.get_position());
379 true
380 }
381 ':' => {
382 state.advance(1);
383 state.add_token(NimSyntaxKind::Colon, start_pos, state.get_position());
384 true
385 }
386 '.' => {
387 state.advance(1);
388 state.add_token(NimSyntaxKind::Dot, start_pos, state.get_position());
389 true
390 }
391 _ => false,
392 }
393 }
394 else {
395 false
396 }
397 }
398
399 pub fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), oak_core::OakError> {
400 while state.not_at_end() {
401 if self.skip_whitespace(state) {
402 continue;
403 }
404
405 if self.lex_newline(state) {
406 continue;
407 }
408
409 if self.lex_comment(state) {
410 continue;
411 }
412
413 if self.lex_string(state) {
414 continue;
415 }
416
417 if self.lex_char(state) {
418 continue;
419 }
420
421 if self.lex_number(state) {
422 continue;
423 }
424
425 if self.lex_identifier(state) {
426 continue;
427 }
428
429 if self.lex_operator(state) {
430 continue;
431 }
432
433 let start_pos = state.get_position();
435 if let Some(ch) = state.peek() {
436 state.advance(ch.len_utf8());
437 state.add_token(NimSyntaxKind::Error, start_pos, state.get_position());
438 }
439 }
440 Ok(())
441 }
442}
443
444impl<'config> Lexer<NimLanguage> for NimLexer<'config> {
445 fn lex<'s, S: Source + ?Sized>(&self, source: &'s S, _edits: &[oak_core::source::TextEdit], cache: &'s mut impl LexerCache<NimLanguage>) -> LexOutput<NimLanguage> {
446 let mut state = LexerState::new(source);
447 let result = self.run(&mut state);
448 if result.is_ok() {
449 state.add_eof();
450 }
451 state.finish_with_cache(result, cache)
452 }
453}