1#![doc = include_str!("readme.md")]
2
3use oak_core::{
4 Lexer, LexerState, Source, TextEdit,
5 lexer::{LexOutput, LexerCache},
6};
7
8pub mod token_type;
10use crate::language::VonLanguage;
11pub use token_type::{VonToken, VonTokenType};
12
13pub(crate) type State<'a, S> = LexerState<'a, S, VonLanguage>;
14
15#[derive(Clone, Debug)]
17pub struct VonLexer<'config> {
18 config: &'config VonLanguage,
19}
20
21impl<'config> VonLexer<'config> {
22 pub fn new(config: &'config VonLanguage) -> Self {
24 Self { config }
25 }
26
27 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
29 let start_pos = state.get_position();
30
31 while let Some(ch) = state.peek() {
32 if ch == ' ' || ch == '\t' {
33 state.advance(ch.len_utf8());
34 }
35 else {
36 break;
37 }
38 }
39
40 if state.get_position() > start_pos {
41 state.add_token(VonTokenType::Whitespace, start_pos, state.get_position());
42 true
43 }
44 else {
45 false
46 }
47 }
48
49 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
51 let start_pos = state.get_position();
52
53 if let Some('\n') = state.peek() {
54 state.advance(1);
55 state.add_token(VonTokenType::Newline, start_pos, state.get_position());
56 true
57 }
58 else if let Some('\r') = state.peek() {
59 state.advance(1);
60 if let Some('\n') = state.peek() {
61 state.advance(1);
62 }
63 state.add_token(VonTokenType::Newline, start_pos, state.get_position());
64 true
65 }
66 else {
67 false
68 }
69 }
70
71 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
73 let start_pos = state.get_position();
74
75 if let Some('#') = state.peek() {
77 state.advance(1);
78
79 while let Some(ch) = state.peek() {
81 if ch == '\n' || ch == '\r' {
82 break;
83 }
84 state.advance(ch.len_utf8());
85 }
86
87 state.add_token(VonTokenType::Comment, start_pos, state.get_position());
88 return true;
89 }
90 false
91 }
92
93 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
95 let start = state.get_position();
96
97 let mut is_raw = false;
99 if let Some('r') = state.peek() {
100 if let Some('a') = state.peek_next_n(1) {
101 if let Some('w') = state.peek_next_n(2) {
102 if let Some(c) = state.peek_next_n(3) {
103 if c == '"' || c == '\'' {
104 is_raw = true;
105 }
107 }
108 }
109 }
110 }
111
112 let quote = if is_raw {
113 state.peek_next_n(3).unwrap()
114 }
115 else {
116 match state.peek() {
117 Some(c) if c == '"' || c == '\'' => c,
118 _ => return false,
119 }
120 };
121
122 if is_raw {
123 state.advance(3);
124 }
125
126 let mut quote_count = 0;
127 while let Some(c) = state.peek() {
128 if c == quote {
129 quote_count += 1;
130 state.advance(c.len_utf8());
131 }
132 else {
133 break;
134 }
135 }
136
137 if quote_count == 2 {
139 state.add_token(VonTokenType::StringLiteral, start, state.get_position());
140 return true;
141 }
142
143 if quote_count == 0 {
144 state.set_position(start);
145 return false;
146 }
147
148 let mut current_consecutive = 0;
149 let mut escaped = false;
150
151 while let Some(c) = state.peek() {
152 if !is_raw && escaped {
153 escaped = false;
154 state.advance(c.len_utf8());
155 current_consecutive = 0;
156 continue;
157 }
158
159 if !is_raw && c == '\\' && quote_count == 1 {
160 escaped = true;
161 state.advance(1);
162 current_consecutive = 0;
163 continue;
164 }
165
166 if c == quote {
167 current_consecutive += 1;
168 state.advance(c.len_utf8());
169 if current_consecutive == quote_count {
170 state.add_token(VonTokenType::StringLiteral, start, state.get_position());
171 return true;
172 }
173 }
174 else {
175 current_consecutive = 0;
176 state.advance(c.len_utf8());
177 }
178 }
179
180 state.add_token(VonTokenType::Error, start, state.get_position());
182 true
183 }
184
185 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
187 let start_pos = state.get_position();
188
189 if let Some(ch) = state.peek() {
190 let is_number_start = ch.is_ascii_digit() || (ch == '-' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit())) || (ch == '.' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()));
192
193 if !is_number_start {
194 return false;
195 }
196
197 if ch == '-' {
198 state.advance(1);
199 }
200
201 if let Some(first) = state.peek() {
203 if first.is_ascii_digit() {
204 while let Some(digit) = state.peek() {
205 if digit.is_ascii_digit() || digit == '_' {
206 state.advance(1);
207 }
208 else {
209 break;
210 }
211 }
212 }
213 }
214
215 if let Some('.') = state.peek() {
217 let mut lookahead = 1;
218 while let Some(c) = state.peek_next_n(lookahead) {
219 if c == '_' {
220 lookahead += 1;
221 }
222 else {
223 break;
224 }
225 }
226 if let Some(next_ch) = state.peek_next_n(lookahead) {
227 if next_ch.is_ascii_digit() {
228 state.advance(1); while let Some(digit) = state.peek() {
230 if digit.is_ascii_digit() || digit == '_' {
231 state.advance(1);
232 }
233 else {
234 break;
235 }
236 }
237 }
238 }
239 }
240
241 if let Some(e) = state.peek() {
243 if e == 'e' || e == 'E' {
244 let mut lookahead = 1;
246 if let Some(sign) = state.peek_next_n(lookahead) {
247 if sign == '+' || sign == '-' {
248 lookahead += 1;
249 }
250 }
251
252 let has_digits = state.peek_next_n(lookahead).map_or(false, |c| c.is_ascii_digit() || (c == '_' && state.peek_next_n(lookahead + 1).map_or(false, |n| n.is_ascii_digit())));
253
254 if has_digits {
255 state.advance(1); if let Some(sign) = state.peek() {
259 if sign == '+' || sign == '-' {
260 state.advance(1);
261 }
262 }
263
264 while let Some(digit) = state.peek() {
266 if digit.is_ascii_digit() || digit == '_' {
267 state.advance(1);
268 }
269 else {
270 break;
271 }
272 }
273 }
274 }
275 }
276
277 if state.get_position() > start_pos {
280 if let Some(next) = state.peek() {
281 if next.is_ascii_alphabetic() || next == '_' {
282 state.set_position(start_pos);
283 return false;
284 }
285 }
286 state.add_token(VonTokenType::NumberLiteral, start_pos, state.get_position());
287 return true;
288 }
289 false
290 }
291 else {
292 false
293 }
294 }
295
296 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
298 let start_pos = state.get_position();
299
300 if let Some(ch) = state.peek() {
301 if ch.is_ascii_alphabetic() || ch == '_' {
302 if ch == 'r' {
304 if let Some('a') = state.peek_next_n(1) {
305 if let Some('w') = state.peek_next_n(2) {
306 if let Some(c) = state.peek_next_n(3) {
307 if c == '"' || c == '\'' {
308 return false;
310 }
311 }
312 }
313 }
314 }
315
316 while let Some(ch) = state.peek() {
317 if ch.is_ascii_alphanumeric() || ch == '_' {
318 state.advance(ch.len_utf8());
319 }
320 else {
321 break;
322 }
323 }
324
325 let text = state.get_text_in((start_pos..state.get_position()).into());
326 let token_kind = match text.as_ref() {
327 "true" | "false" => VonTokenType::BoolLiteral,
328 "null" => VonTokenType::NullLiteral,
329 _ => VonTokenType::Identifier,
330 };
331
332 state.add_token(token_kind, start_pos, state.get_position());
333 return true;
334 }
335 }
336 false
337 }
338
339 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
341 let start_pos = state.get_position();
342
343 if let Some(ch) = state.peek() {
344 let token_kind = match ch {
345 '[' => {
346 state.advance(1);
347 VonTokenType::LeftBracket
348 }
349 ']' => {
350 state.advance(1);
351 VonTokenType::RightBracket
352 }
353 '{' => {
354 state.advance(1);
355 VonTokenType::LeftBrace
356 }
357 '}' => {
358 state.advance(1);
359 VonTokenType::RightBrace
360 }
361 ',' => {
362 state.advance(1);
363 VonTokenType::Comma
364 }
365 ':' => {
366 state.advance(1);
367 VonTokenType::Colon
368 }
369 '=' => {
370 state.advance(1);
371 VonTokenType::Eq
372 }
373 _ => return false,
374 };
375 state.add_token(token_kind, start_pos, state.get_position());
376 true
377 }
378 else {
379 false
380 }
381 }
382}
383
384impl<'config> Lexer<VonLanguage> for VonLexer<'config> {
385 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], _cache: &'a mut impl LexerCache<VonLanguage>) -> LexOutput<VonLanguage> {
386 let mut state = State::new(source);
387 while state.not_at_end() {
388 if self.skip_whitespace(&mut state) {
389 continue;
390 }
391 if self.lex_newline(&mut state) {
392 continue;
393 }
394 if self.lex_comment(&mut state) {
395 continue;
396 }
397 if self.lex_identifier_or_keyword(&mut state) {
398 continue;
399 }
400 if self.lex_number(&mut state) {
401 continue;
402 }
403 if self.lex_string(&mut state) {
404 continue;
405 }
406 if self.lex_operator(&mut state) {
407 continue;
408 }
409
410 let start_pos = state.get_position();
412 if let Some(ch) = state.peek() {
413 state.advance(ch.len_utf8());
414 state.add_token(VonTokenType::Error, start_pos, state.get_position());
415 }
416 else {
417 break;
418 }
419 }
420
421 state.finish(Ok(()))
422 }
423}