1use crate::{kind::JsonSyntaxKind, language::JsonLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, OakError, SourceText, lexer::LexOutput, source::Source};
3
4type State<S> = LexerState<S, JsonLanguage>;
5
6#[derive(Clone)]
8pub struct JsonLexer<'config> {
9 config: &'config JsonLanguage,
10}
11
12impl<'config> JsonLexer<'config> {
13 pub fn new(config: &'config JsonLanguage) -> Self {
14 Self { config }
15 }
16
17 pub fn tokenize_source(&self, source: &SourceText) -> LexOutput<JsonLanguage> {
19 self.lex(source)
20 }
21
22 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
24 let start_pos = state.get_position();
25
26 while let Some(ch) = state.peek() {
27 if ch == ' ' || ch == '\t' {
28 state.advance(ch.len_utf8());
29 }
30 else {
31 break;
32 }
33 }
34
35 if state.get_position() > start_pos {
36 state.add_token(JsonSyntaxKind::Whitespace, start_pos, state.get_position());
37 true
38 }
39 else {
40 false
41 }
42 }
43
44 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
46 let start_pos = state.get_position();
47
48 if let Some('\n') = state.peek() {
49 state.advance(1);
50 state.add_token(JsonSyntaxKind::Whitespace, start_pos, state.get_position());
51 true
52 }
53 else if let Some('\r') = state.peek() {
54 state.advance(1);
55 if let Some('\n') = state.peek() {
56 state.advance(1);
57 }
58 state.add_token(JsonSyntaxKind::Whitespace, start_pos, state.get_position());
59 true
60 }
61 else {
62 false
63 }
64 }
65
66 fn lex_comment<S: Source>(&self, state: &mut State<S>) -> bool {
68 if !self.config.comments {
69 return false;
70 }
71
72 let start_pos = state.get_position();
73
74 if let Some('/') = state.peek() {
75 let remaining_text = state.get_text_in((start_pos..state.length()).into());
77 if remaining_text.len() > 1 {
78 let next_ch = remaining_text.chars().nth(1).unwrap();
79 match next_ch {
80 '/' => {
81 state.advance(2); while let Some(ch) = state.peek() {
86 if ch == '\n' || ch == '\r' {
87 break;
88 }
89 state.advance(ch.len_utf8());
90 }
91
92 state.add_token(JsonSyntaxKind::Comment, start_pos, state.get_position());
93 return true;
94 }
95 '*' => {
96 state.advance(2); let mut closed = false;
99
100 while let Some(ch) = state.peek() {
101 if ch == '*' {
102 let current_pos = state.get_position();
103 let remaining = state.get_text_in((current_pos..state.length()).into());
104 if remaining.len() > 1 && remaining.chars().nth(1) == Some('/') {
105 state.advance(2); closed = true;
107 break;
108 }
109 }
110 state.advance(ch.len_utf8());
111 }
112
113 if !closed {
114 }
116
117 state.add_token(JsonSyntaxKind::Comment, start_pos, state.get_position());
118 return true;
119 }
120 _ => {}
121 }
122 }
123 }
124 false
125 }
126
127 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
129 let start_pos = state.get_position();
130
131 let quote_char = if let Some('"') = state.peek() {
132 '"'
133 }
134 else if self.config.single_quotes && matches!(state.peek(), Some('\'')) {
135 '\''
136 }
137 else {
138 return false;
139 };
140
141 state.advance(quote_char.len_utf8()); let mut escaped = false;
143
144 while let Some(ch) = state.peek() {
145 if escaped {
146 escaped = false;
147 state.advance(ch.len_utf8());
148 }
149 else if ch == '\\' {
150 escaped = true;
151 state.advance(ch.len_utf8());
152 }
153 else if ch == quote_char {
154 state.advance(ch.len_utf8()); break;
156 }
157 else if ch == '\n' || ch == '\r' {
158 break;
160 }
161 else {
162 state.advance(ch.len_utf8());
163 }
164 }
165
166 state.add_token(JsonSyntaxKind::StringLiteral, start_pos, state.get_position());
167 true
168 }
169
170 fn lex_number<S: Source>(&self, state: &mut State<S>) -> bool {
172 let start_pos = state.get_position();
173
174 if let Some('-') = state.peek() {
176 state.advance(1);
177 }
178
179 let mut has_digits = false;
180
181 if self.config.hex_numbers
183 && let Some('0') = state.peek()
184 {
185 if let Some(next_ch) = state.peek_next_n(1) {
186 if next_ch == 'x' || next_ch == 'X' {
187 state.advance(2); while let Some(ch) = state.peek() {
189 if ch.is_ascii_hexdigit() {
190 has_digits = true;
191 state.advance(1);
192 }
193 else {
194 break;
195 }
196 }
197
198 if has_digits {
199 state.add_token(JsonSyntaxKind::NumberLiteral, start_pos, state.get_position());
200 return true;
201 }
202 else {
203 state.set_position(start_pos);
205 return false;
206 }
207 }
208 }
209 }
210
211 while let Some(ch) = state.peek() {
213 if ch.is_ascii_digit() {
214 has_digits = true;
215 state.advance(1);
216 }
217 else {
218 break;
219 }
220 }
221
222 if let Some('.') = state.peek() {
224 state.advance(1);
225 while let Some(ch) = state.peek() {
226 if ch.is_ascii_digit() {
227 has_digits = true;
228 state.advance(1);
229 }
230 else {
231 break;
232 }
233 }
234 }
235
236 if let Some(ch) = state.peek() {
238 if ch == 'e' || ch == 'E' {
239 state.advance(1);
240 if let Some(sign) = state.peek() {
241 if sign == '+' || sign == '-' {
242 state.advance(1);
243 }
244 }
245 while let Some(digit) = state.peek() {
246 if digit.is_ascii_digit() {
247 state.advance(1);
248 }
249 else {
250 break;
251 }
252 }
253 }
254 }
255
256 if has_digits && state.get_position() > start_pos {
257 state.add_token(JsonSyntaxKind::NumberLiteral, start_pos, state.get_position());
258 true
259 }
260 else {
261 state.set_position(start_pos);
263 false
264 }
265 }
266
267 fn lex_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
269 let start_pos = state.get_position();
270
271 if start_pos + 4 <= state.length() {
273 let text = state.get_text_in((start_pos..start_pos + 4).into());
274 if text == "true" {
275 state.advance(4);
276 state.add_token(JsonSyntaxKind::BooleanLiteral, start_pos, state.get_position());
277 return true;
278 }
279 }
280
281 if start_pos + 5 <= state.length() {
283 let text = state.get_text_in((start_pos..start_pos + 5).into());
284 if text == "false" {
285 state.advance(5);
286 state.add_token(JsonSyntaxKind::BooleanLiteral, start_pos, state.get_position());
287 return true;
288 }
289 }
290
291 if start_pos + 4 <= state.length() {
293 let text = state.get_text_in((start_pos..start_pos + 4).into());
294 if text == "null" {
295 state.advance(4);
296 state.add_token(JsonSyntaxKind::NullLiteral, start_pos, state.get_position());
297 return true;
298 }
299 }
300
301 false
302 }
303
304 fn lex_bare_key<S: Source>(&self, state: &mut State<S>) -> bool {
306 if !self.config.bare_keys {
307 return false;
308 }
309
310 let start_pos = state.get_position();
311
312 if let Some(ch) = state.peek() {
313 if ch.is_alphabetic() || ch == '_' || ch == '$' {
314 state.advance(ch.len_utf8());
315
316 while let Some(ch) = state.peek() {
318 if ch.is_alphanumeric() || ch == '_' || ch == '$' {
319 state.advance(ch.len_utf8());
320 }
321 else {
322 break;
323 }
324 }
325
326 state.add_token(JsonSyntaxKind::BareKey, start_pos, state.get_position());
327 true
328 }
329 else {
330 false
331 }
332 }
333 else {
334 false
335 }
336 }
337
338 fn lex_operator_or_delimiter<S: Source>(&self, state: &mut State<S>) -> bool {
340 let start_pos = state.get_position();
341
342 if let Some(ch) = state.peek() {
343 let token_kind = match ch {
344 '{' => JsonSyntaxKind::LeftBrace,
345 '}' => JsonSyntaxKind::RightBrace,
346 '[' => JsonSyntaxKind::LeftBracket,
347 ']' => JsonSyntaxKind::RightBracket,
348 ',' => JsonSyntaxKind::Comma,
349 ':' => JsonSyntaxKind::Colon,
350 _ => return false,
351 };
352
353 state.advance(ch.len_utf8());
354 state.add_token(token_kind, start_pos, state.get_position());
355 true
356 }
357 else {
358 false
359 }
360 }
361}
362
363impl<'config> Lexer<JsonLanguage> for JsonLexer<'config> {
364 fn lex_incremental(
365 &self,
366 source: impl Source,
367 _start_offset: usize,
368 _cache: IncrementalCache<'_, JsonLanguage>,
369 ) -> LexOutput<JsonLanguage> {
370 let mut state = LexerState::new_with_cache(source, _start_offset, _cache);
371 let result = self.run(&mut state);
372 state.finish(result)
373 }
374
375 fn lex(&self, source: impl Source) -> LexOutput<JsonLanguage> {
376 let mut state = LexerState::new(source);
377 let result = self.run(&mut state);
378 state.finish(result)
379 }
380}
381
382impl<'config> JsonLexer<'config> {
383 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
384 while state.not_at_end() {
385 if self.skip_whitespace(state) {
387 continue;
388 }
389
390 if self.lex_newline(state) {
391 continue;
392 }
393
394 if self.lex_comment(state) {
395 continue;
396 }
397
398 if self.lex_string_literal(state) {
399 continue;
400 }
401
402 if self.lex_number(state) {
403 continue;
404 }
405
406 if self.lex_keyword(state) {
407 continue;
408 }
409
410 if self.lex_bare_key(state) {
411 continue;
412 }
413
414 if self.lex_operator_or_delimiter(state) {
415 continue;
416 }
417
418 let start_pos = state.get_position();
420 if let Some(ch) = state.peek() {
421 state.advance(ch.len_utf8());
422 state.add_token(JsonSyntaxKind::Error, start_pos, state.get_position());
423 }
424 else {
425 break;
426 }
427 }
428
429 let eof_pos = state.get_position();
431 state.add_token(JsonSyntaxKind::Eof, eof_pos, eof_pos);
432
433 Ok(())
434 }
435}