1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::JsonLanguage, lexer::token_type::JsonTokenType};
5use oak_core::{
6 errors::OakError,
7 lexer::{CommentConfig, LexOutput, Lexer, LexerCache, LexerState, StringConfig},
8 source::{Source, TextEdit},
9};
10use std::sync::LazyLock;
11
12type State<'a, S> = LexerState<'a, S, JsonLanguage>;
13
14static JSON_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "/*", block_end: "*/", nested_blocks: false });
15static JSON_SINGLE_QUOTE_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: Some('\\') });
16
17#[derive(Clone)]
19pub struct JsonLexer<'config> {
20 config: &'config JsonLanguage,
21}
22
23impl<'config> Lexer<JsonLanguage> for JsonLexer<'config> {
24 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<JsonLanguage>) -> LexOutput<JsonLanguage> {
25 let mut state = State::new(source);
26 let result = self.run(&mut state);
27 if result.is_ok() {
28 state.add_eof();
29 }
30 state.finish_with_cache(result, cache)
31 }
32}
33
34impl<'config> JsonLexer<'config> {
35 pub fn new(config: &'config JsonLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
40 while state.not_at_end() {
41 let safe_point = state.get_position();
42 let Some(ch) = state.peek()
43 else {
44 break;
45 };
46
47 match ch {
48 ' ' | '\t' | '\n' | '\r' => {
49 self.skip_whitespace_fast(state);
50 }
51 '"' => {
52 self.lex_string_fast(state);
53 }
54 '/' if self.config.comments => {
55 JSON_COMMENT.scan(state, JsonTokenType::Comment, JsonTokenType::Comment);
56 }
57 '-' | '0'..='9' => {
58 self.lex_number(state);
59 }
60 '{' | '}' | '[' | ']' | ',' | ':' => {
61 self.lex_operator_or_delimiter(state);
62 }
63 't' | 'f' | 'n' => {
64 if !self.lex_keyword(state) {
65 if self.config.bare_keys {
66 self.lex_bare_key(state);
67 }
68 }
69 }
70 '\'' if self.config.single_quotes => {
71 JSON_SINGLE_QUOTE_STRING.scan(state, JsonTokenType::StringLiteral);
72 }
73 _ => {
74 let mut handled = false;
75 if self.config.bare_keys && (ch.is_alphabetic() || ch == '_' || ch == '$') {
76 handled = self.lex_bare_key(state);
77 }
78
79 if !handled {
80 state.advance(ch.len_utf8());
82 state.add_token(JsonTokenType::Error, safe_point, state.get_position());
83 }
84 }
85 }
86
87 state.advance_if_dead_lock(safe_point);
88 }
89
90 Ok(())
91 }
92
93 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
95 let start_pos = state.get_position();
96
97 state.consume_if_starts_with("-");
99
100 let mut has_digits = false;
101
102 if self.config.hex_numbers && state.starts_with("0") {
104 let n1 = state.peek_next_n(1);
105 if n1 == Some('x') || n1 == Some('X') {
106 state.advance(2); let range = state.take_while(|c| c.is_ascii_hexdigit() || c == '_');
108 if range.end > range.start {
109 state.add_token(JsonTokenType::NumberLiteral, start_pos, state.get_position());
110 return true;
111 }
112 }
114 }
115
116 let r1 = state.take_while(|c| c.is_ascii_digit());
118 if r1.end > r1.start {
119 has_digits = true;
120 }
121
122 if state.consume_if_starts_with(".") {
124 let r2 = state.take_while(|c| c.is_ascii_digit());
125 if r2.end > r2.start {
126 has_digits = true;
127 }
128 }
129
130 if let Some(ch) = state.peek() {
132 if ch == 'e' || ch == 'E' {
133 state.advance(1);
134 if let Some(sign) = state.peek() {
135 if sign == '+' || sign == '-' {
136 state.advance(1);
137 }
138 }
139 state.take_while(|c| c.is_ascii_digit());
140 }
141 }
142
143 if has_digits {
144 state.add_token(JsonTokenType::NumberLiteral, start_pos, state.get_position());
145 true
146 }
147 else {
148 false
149 }
150 }
151
152 fn lex_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
154 let start_pos = state.get_position();
155 if state.consume_if_starts_with("true") || state.consume_if_starts_with("false") {
156 state.add_token(JsonTokenType::BooleanLiteral, start_pos, state.get_position());
157 return true;
158 }
159 if state.consume_if_starts_with("null") {
160 state.add_token(JsonTokenType::NullLiteral, start_pos, state.get_position());
161 return true;
162 }
163 false
164 }
165
166 fn lex_bare_key<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
168 let start_pos = state.get_position();
169 if let Some(ch) = state.peek() {
170 if ch.is_alphabetic() || ch == '_' || ch == '$' {
171 state.advance(ch.len_utf8());
172 state.take_while(|c| c.is_alphanumeric() || c == '_' || c == '$');
173 state.add_token(JsonTokenType::BareKey, start_pos, state.get_position());
174 return true;
175 }
176 }
177 false
178 }
179
180 fn lex_operator_or_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
182 let start_pos = state.get_position();
183 if let Some(ch) = state.peek() {
184 let token_kind = match ch {
185 '{' => JsonTokenType::LeftBrace,
186 '}' => JsonTokenType::RightBrace,
187 '[' => JsonTokenType::LeftBracket,
188 ']' => JsonTokenType::RightBracket,
189 ',' => JsonTokenType::Comma,
190 ':' => JsonTokenType::Colon,
191 _ => return false,
192 };
193
194 state.advance(ch.len_utf8());
195 state.add_token(token_kind, start_pos, state.get_position());
196 true
197 }
198 else {
199 false
200 }
201 }
202
203 fn skip_whitespace_fast<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
204 let start_pos = state.get_position();
205 let mut count = 0;
206 while let Some(ch) = state.peek() {
207 if ch.is_whitespace() {
208 state.advance(ch.len_utf8());
209 count += 1;
210 }
211 else {
212 break;
213 }
214 }
215 if count > 0 {
216 state.add_token(JsonTokenType::Whitespace, start_pos, state.get_position());
217 true
218 }
219 else {
220 false
221 }
222 }
223
224 fn lex_string_fast<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
225 let start_pos = state.get_position();
226 if !state.consume_if_starts_with("\"") {
227 return false;
228 }
229
230 let mut escaped = false;
231 while let Some(ch) = state.peek() {
232 state.advance(ch.len_utf8());
233 if escaped {
234 escaped = false;
235 continue;
236 }
237 if ch == '\\' {
238 escaped = true;
239 continue;
240 }
241 if ch == '"' {
242 state.add_token(JsonTokenType::StringLiteral, start_pos, state.get_position());
243 return true;
244 }
245 }
246 state.add_token(JsonTokenType::Error, start_pos, state.get_position());
248 false
249 }
250}