1use crate::{kind::JsonSyntaxKind, language::JsonLanguage};
2use oak_core::{
3 errors::OakError,
4 lexer::{CommentConfig, LexOutput, Lexer, LexerCache, LexerState, StringConfig},
5 source::{Source, TextEdit},
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, JsonLanguage>;
10
11static JSON_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: false });
12static JSON_SINGLE_QUOTE_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
13
14#[derive(Clone)]
16pub struct JsonLexer<'config> {
17 _config: &'config JsonLanguage,
18}
19
20impl<'config> Lexer<JsonLanguage> for JsonLexer<'config> {
21 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<JsonLanguage>) -> LexOutput<JsonLanguage> {
22 let mut state = State::new(source);
23 let result = self.run(&mut state);
24 if result.is_ok() {
25 state.add_eof();
26 }
27 state.finish_with_cache(result, cache)
28 }
29}
30
31impl<'config> JsonLexer<'config> {
32 pub fn new(config: &'config JsonLanguage) -> Self {
33 Self { _config: config }
34 }
35
36 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
37 while state.not_at_end() {
38 let safe_point = state.get_position();
39 let Some(ch) = state.peek()
40 else {
41 break;
42 };
43
44 match ch {
45 ' ' | '\t' | '\n' | '\r' => {
46 self.skip_whitespace_fast(state);
47 }
48 '"' => {
49 self.lex_string_fast(state);
50 }
51 '/' if self._config.comments => {
52 JSON_COMMENT.scan(state, JsonSyntaxKind::Comment, JsonSyntaxKind::Comment);
53 }
54 '-' | '0'..='9' => {
55 self.lex_number(state);
56 }
57 '{' | '}' | '[' | ']' | ',' | ':' => {
58 self.lex_operator_or_delimiter(state);
59 }
60 't' | 'f' | 'n' => {
61 if !self.lex_keyword(state) {
62 if self._config.bare_keys {
63 self.lex_bare_key(state);
64 }
65 }
66 }
67 '\'' if self._config.single_quotes => {
68 JSON_SINGLE_QUOTE_STRING.scan(state, JsonSyntaxKind::StringLiteral);
69 }
70 _ => {
71 let mut handled = false;
72 if self._config.bare_keys && (ch.is_alphabetic() || ch == '_' || ch == '$') {
73 handled = self.lex_bare_key(state);
74 }
75
76 if !handled {
77 state.advance(ch.len_utf8());
79 state.add_token(JsonSyntaxKind::Error, safe_point, state.get_position());
80 }
81 }
82 }
83
84 state.advance_if_dead_lock(safe_point);
85 }
86
87 Ok(())
88 }
89
90 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
92 let start_pos = state.get_position();
93
94 state.consume_if_starts_with("-");
96
97 let mut has_digits = false;
98
99 if self._config.hex_numbers && state.starts_with("0") {
101 let n1 = state.peek_next_n(1);
102 if n1 == Some('x') || n1 == Some('X') {
103 state.advance(2); let range = state.take_while(|c| c.is_ascii_hexdigit() || c == '_');
105 if range.end > range.start {
106 state.add_token(JsonSyntaxKind::NumberLiteral, start_pos, state.get_position());
107 return true;
108 }
109 }
111 }
112
113 let r1 = state.take_while(|c| c.is_ascii_digit());
115 if r1.end > r1.start {
116 has_digits = true;
117 }
118
119 if state.consume_if_starts_with(".") {
121 let r2 = state.take_while(|c| c.is_ascii_digit());
122 if r2.end > r2.start {
123 has_digits = true;
124 }
125 }
126
127 if let Some(ch) = state.peek() {
129 if ch == 'e' || ch == 'E' {
130 state.advance(1);
131 if let Some(sign) = state.peek() {
132 if sign == '+' || sign == '-' {
133 state.advance(1);
134 }
135 }
136 state.take_while(|c| c.is_ascii_digit());
137 }
138 }
139
140 if has_digits {
141 state.add_token(JsonSyntaxKind::NumberLiteral, start_pos, state.get_position());
142 true
143 }
144 else {
145 false
146 }
147 }
148
149 fn lex_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
151 let start_pos = state.get_position();
152 if state.consume_if_starts_with("true") || state.consume_if_starts_with("false") {
153 state.add_token(JsonSyntaxKind::BooleanLiteral, start_pos, state.get_position());
154 return true;
155 }
156 if state.consume_if_starts_with("null") {
157 state.add_token(JsonSyntaxKind::NullLiteral, start_pos, state.get_position());
158 return true;
159 }
160 false
161 }
162
163 fn lex_bare_key<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
165 let start_pos = state.get_position();
166 if let Some(ch) = state.peek() {
167 if ch.is_alphabetic() || ch == '_' || ch == '$' {
168 state.advance(ch.len_utf8());
169 state.take_while(|c| c.is_alphanumeric() || c == '_' || c == '$');
170 state.add_token(JsonSyntaxKind::BareKey, start_pos, state.get_position());
171 return true;
172 }
173 }
174 false
175 }
176
177 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
179 let start_pos = state.get_position();
180 if let Some(ch) = state.peek() {
181 let token_kind = match ch {
182 '{' => JsonSyntaxKind::LeftBrace,
183 '}' => JsonSyntaxKind::RightBrace,
184 '[' => JsonSyntaxKind::LeftBracket,
185 ']' => JsonSyntaxKind::RightBracket,
186 ',' => JsonSyntaxKind::Comma,
187 ':' => JsonSyntaxKind::Colon,
188 _ => return false,
189 };
190
191 state.advance(ch.len_utf8());
192 state.add_token(token_kind, start_pos, state.get_position());
193 true
194 }
195 else {
196 false
197 }
198 }
199
200 fn skip_whitespace_fast<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
201 let start_pos = state.get_position();
202 let mut count = 0;
203 while let Some(ch) = state.peek() {
204 if ch.is_whitespace() {
205 state.advance(ch.len_utf8());
206 count += 1;
207 }
208 else {
209 break;
210 }
211 }
212
213 if count > 0 {
214 state.add_token(JsonSyntaxKind::Whitespace, start_pos, state.get_position());
215 true
216 }
217 else {
218 false
219 }
220 }
221
222 fn lex_string_fast<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
223 let start_pos = state.get_position();
224 if !state.consume_if_starts_with("\"") {
225 return false;
226 }
227
228 while let Some(ch) = state.peek() {
229 if ch == '"' {
230 state.advance(ch.len_utf8());
231 state.add_token(JsonSyntaxKind::StringLiteral, start_pos, state.get_position());
232 return true;
233 }
234 else if ch == '\\' {
235 state.advance(ch.len_utf8());
236 if let Some(escaped) = state.peek() {
237 state.advance(escaped.len_utf8());
238 }
239 }
240 else {
241 state.advance(ch.len_utf8());
242 }
243 }
244
245 state.add_token(JsonSyntaxKind::StringLiteral, start_pos, state.get_position());
246 true
247 }
248}