1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::JsonLanguage, lexer::token_type::JsonTokenType};
6use oak_core::{
7 errors::OakError,
8 lexer::{CommentConfig, LexOutput, Lexer, LexerCache, LexerState, StringConfig},
9 source::{Source, TextEdit},
10};
11use std::sync::LazyLock;
12
13pub(crate) type State<'a, S> = LexerState<'a, S, JsonLanguage>;
14
15static JSON_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: false });
16static JSON_SINGLE_QUOTE_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
17
18#[derive(Clone)]
20pub struct JsonLexer<'config> {
21 config: &'config JsonLanguage,
22}
23
24impl<'config> Lexer<JsonLanguage> for JsonLexer<'config> {
25 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<JsonLanguage>) -> LexOutput<JsonLanguage> {
26 let mut state = State::new(source);
27 let result = self.run(&mut state);
28 if result.is_ok() {
29 state.add_eof();
30 }
31 state.finish_with_cache(result, cache)
32 }
33}
34
35impl<'config> JsonLexer<'config> {
36 pub fn new(config: &'config JsonLanguage) -> Self {
38 Self { config }
39 }
40
41 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
42 while state.not_at_end() {
43 let safe_point = state.get_position();
44 let Some(ch) = state.peek()
45 else {
46 break;
47 };
48
49 match ch {
50 ' ' | '\t' | '\n' | '\r' => {
51 self.skip_whitespace_fast(state);
52 }
53 '"' => {
54 self.lex_string_fast(state);
55 }
56 '/' if self.config.comments => {
57 JSON_COMMENT.scan(state, JsonTokenType::Comment, JsonTokenType::Comment);
58 }
59 '-' | '0'..='9' => {
60 self.lex_number(state);
61 }
62 '{' | '}' | '[' | ']' | ',' | ':' => {
63 self.lex_operator_or_delimiter(state);
64 }
65 't' | 'f' | 'n' => {
66 if !self.lex_keyword(state) {
67 if self.config.bare_keys {
68 self.lex_bare_key(state);
69 }
70 }
71 }
72 '\'' if self.config.single_quotes => {
73 JSON_SINGLE_QUOTE_STRING.scan(state, JsonTokenType::StringLiteral);
74 }
75 _ => {
76 let mut handled = false;
77 if self.config.bare_keys && (ch.is_alphabetic() || ch == '_' || ch == '$') {
78 handled = self.lex_bare_key(state);
79 }
80
81 if !handled {
82 state.advance(ch.len_utf8());
84 state.add_token(JsonTokenType::Error, safe_point, state.get_position());
85 }
86 }
87 }
88
89 state.advance_if_dead_lock(safe_point);
90 }
91
92 Ok(())
93 }
94
95 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
97 let start_pos = state.get_position();
98
99 state.consume_if_starts_with("-");
101
102 let mut has_digits = false;
103
104 if self.config.hex_numbers && state.starts_with("0") {
106 let n1 = state.peek_next_n(1);
107 if n1 == Some('x') || n1 == Some('X') {
108 state.advance(2); let range = state.take_while(|c| c.is_ascii_hexdigit() || c == '_');
110 if range.end > range.start {
111 state.add_token(JsonTokenType::NumberLiteral, start_pos, state.get_position());
112 return true;
113 }
114 }
116 }
117
118 let r1 = state.take_while(|c| c.is_ascii_digit());
120 if r1.end > r1.start {
121 has_digits = true;
122 }
123
124 if state.consume_if_starts_with(".") {
126 let r2 = state.take_while(|c| c.is_ascii_digit());
127 if r2.end > r2.start {
128 has_digits = true;
129 }
130 }
131
132 if let Some(ch) = state.peek() {
134 if ch == 'e' || ch == 'E' {
135 state.advance(1);
136 if let Some(sign) = state.peek() {
137 if sign == '+' || sign == '-' {
138 state.advance(1);
139 }
140 }
141 state.take_while(|c| c.is_ascii_digit());
142 }
143 }
144
145 if has_digits {
146 state.add_token(JsonTokenType::NumberLiteral, start_pos, state.get_position());
147 true
148 }
149 else {
150 false
151 }
152 }
153
154 fn lex_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
156 let start_pos = state.get_position();
157 if state.consume_if_starts_with("true") || state.consume_if_starts_with("false") {
158 state.add_token(JsonTokenType::BooleanLiteral, start_pos, state.get_position());
159 return true;
160 }
161 if state.consume_if_starts_with("null") {
162 state.add_token(JsonTokenType::NullLiteral, start_pos, state.get_position());
163 return true;
164 }
165 false
166 }
167
168 fn lex_bare_key<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
170 let start_pos = state.get_position();
171 if let Some(ch) = state.peek() {
172 if ch.is_alphabetic() || ch == '_' || ch == '$' {
173 state.advance(ch.len_utf8());
174 state.take_while(|c| c.is_alphanumeric() || c == '_' || c == '$');
175 state.add_token(JsonTokenType::BareKey, start_pos, state.get_position());
176 return true;
177 }
178 }
179 false
180 }
181
182 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
184 let start_pos = state.get_position();
185 if let Some(ch) = state.peek() {
186 let token_kind = match ch {
187 '{' => JsonTokenType::LeftBrace,
188 '}' => JsonTokenType::RightBrace,
189 '[' => JsonTokenType::LeftBracket,
190 ']' => JsonTokenType::RightBracket,
191 ',' => JsonTokenType::Comma,
192 ':' => JsonTokenType::Colon,
193 _ => return false,
194 };
195
196 state.advance(ch.len_utf8());
197 state.add_token(token_kind, start_pos, state.get_position());
198 true
199 }
200 else {
201 false
202 }
203 }
204
205 fn skip_whitespace_fast<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
206 let start_pos = state.get_position();
207 let mut count = 0;
208 while let Some(ch) = state.peek() {
209 if ch.is_whitespace() {
210 state.advance(ch.len_utf8());
211 count += 1;
212 }
213 else {
214 break;
215 }
216 }
217 if count > 0 {
218 state.add_token(JsonTokenType::Whitespace, start_pos, state.get_position());
219 true
220 }
221 else {
222 false
223 }
224 }
225
226 fn lex_string_fast<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
227 let start_pos = state.get_position();
228 if !state.consume_if_starts_with("\"") {
229 return false;
230 }
231
232 let mut escaped = false;
233 while let Some(ch) = state.peek() {
234 state.advance(ch.len_utf8());
235 if escaped {
236 escaped = false;
237 continue;
238 }
239 if ch == '\\' {
240 escaped = true;
241 continue;
242 }
243 if ch == '"' {
244 state.add_token(JsonTokenType::StringLiteral, start_pos, state.get_position());
245 return true;
246 }
247 }
248 state.add_token(JsonTokenType::Error, start_pos, state.get_position());
250 false
251 }
252}