1use crate::{kind::JasminSyntaxKind, language::JasminLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, JasminLanguage>;
5
6#[derive(Clone)]
8pub struct JasminLexer<'config> {
9 _config: &'config JasminLanguage,
10}
11
12impl<'config> JasminLexer<'config> {
13 pub fn new(config: &'config JasminLanguage) -> Self {
14 Self { _config: config }
15 }
16
17 fn keyword_or_identifier(&self, text: &str) -> JasminSyntaxKind {
19 match text {
20 ".class" => JasminSyntaxKind::ClassKw,
21 ".version" => JasminSyntaxKind::VersionKw,
22 ".method" => JasminSyntaxKind::MethodKw,
23 ".field" => JasminSyntaxKind::FieldKw,
24 ".string" => JasminSyntaxKind::StringKw,
25 ".source" => JasminSyntaxKind::SourceFileKw,
26 ".stack" => JasminSyntaxKind::StackKw,
27 ".locals" => JasminSyntaxKind::LocalsKw,
28 ".end" => JasminSyntaxKind::EndKw,
29 ".compiled" => JasminSyntaxKind::CompiledKw,
30 ".from" => JasminSyntaxKind::FromKw,
31 ".inner" => JasminSyntaxKind::InnerClassKw,
32 ".nest" => JasminSyntaxKind::NestMembersKw,
33 ".bootstrap" => JasminSyntaxKind::BootstrapMethodKw,
34
35 "public" => JasminSyntaxKind::Public,
36 "private" => JasminSyntaxKind::Private,
37 "protected" => JasminSyntaxKind::Protected,
38 "static" => JasminSyntaxKind::Static,
39 "super" => JasminSyntaxKind::Super,
40 "final" => JasminSyntaxKind::Final,
41 "abstract" => JasminSyntaxKind::Abstract,
42 "synchronized" => JasminSyntaxKind::Synchronized,
43 "native" => JasminSyntaxKind::Native,
44 "synthetic" => JasminSyntaxKind::Synthetic,
45 "deprecated" => JasminSyntaxKind::Deprecated,
46 "varargs" => JasminSyntaxKind::Varargs,
47
48 "aload_0" => JasminSyntaxKind::ALoad0,
49 "aload_1" => JasminSyntaxKind::ALoad1,
50 "aload_2" => JasminSyntaxKind::ALoad2,
51 "aload_3" => JasminSyntaxKind::ALoad3,
52 "iload_0" => JasminSyntaxKind::ILoad0,
53 "iload_1" => JasminSyntaxKind::ILoad1,
54 "iload_2" => JasminSyntaxKind::ILoad2,
55 "iload_3" => JasminSyntaxKind::ILoad3,
56 "ldc" => JasminSyntaxKind::Ldc,
57 "ldc_w" => JasminSyntaxKind::LdcW,
58 "ldc2_w" => JasminSyntaxKind::Ldc2W,
59 "invokespecial" => JasminSyntaxKind::InvokeSpecial,
60 "invokevirtual" => JasminSyntaxKind::InvokeVirtual,
61 "invokestatic" => JasminSyntaxKind::InvokeStatic,
62 "invokeinterface" => JasminSyntaxKind::InvokeInterface,
63 "invokedynamic" => JasminSyntaxKind::InvokeDynamic,
64 "getstatic" => JasminSyntaxKind::GetStatic,
65 "putstatic" => JasminSyntaxKind::PutStatic,
66 "getfield" => JasminSyntaxKind::GetField,
67 "putfield" => JasminSyntaxKind::PutField,
68 "return" => JasminSyntaxKind::Return,
69 "areturn" => JasminSyntaxKind::AReturn,
70 "ireturn" => JasminSyntaxKind::IReturn,
71 "pop" => JasminSyntaxKind::Pop,
72 "new" => JasminSyntaxKind::New,
73
74 _ => {
75 if self.is_type_descriptor(text) { JasminSyntaxKind::TypeDescriptor } else { JasminSyntaxKind::IdentifierToken }
77 }
78 }
79 }
80
81 fn is_type_descriptor(&self, text: &str) -> bool {
83 if text.is_empty() {
84 return false;
85 }
86
87 if matches!(text, "B" | "C" | "D" | "F" | "I" | "J" | "S" | "Z" | "V") {
89 return true;
90 }
91
92 if text.starts_with('[') {
94 return true;
95 }
96
97 if text.starts_with('L') && text.ends_with(';') {
99 return true;
100 }
101
102 false
103 }
104
105 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
107 let start_pos = state.get_position();
108 let mut consumed = false;
109
110 while let Some(ch) = state.peek() {
111 if ch.is_whitespace() {
112 consumed = true;
113 state.advance(ch.len_utf8());
114 }
115 else {
116 break;
117 }
118 }
119
120 if consumed {
121 state.add_token(JasminSyntaxKind::Whitespace, start_pos, state.get_position());
122 true
123 }
124 else {
125 false
126 }
127 }
128
129 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
131 if let Some(ch) = state.peek() {
132 if ch == ';' {
133 let start_pos = state.get_position();
134 while let Some(ch) = state.peek() {
136 state.advance(ch.len_utf8());
137 if ch == '\n' {
138 break;
139 }
140 }
141 state.add_token(JasminSyntaxKind::Comment, start_pos, state.get_position());
142 return true;
143 }
144 }
145 false
146 }
147
148 fn lex_string<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
150 if let Some(ch) = state.peek() {
151 if ch == '"' {
152 let start_pos = state.get_position();
153 state.advance(1); while let Some(ch) = state.peek() {
156 if ch == '"' {
157 state.advance(1); break;
159 }
160 else if ch == '\\' {
161 state.advance(1); if state.peek().is_some() {
163 state.advance(1); }
165 }
166 else {
167 state.advance(ch.len_utf8());
168 }
169 }
170
171 state.add_token(JasminSyntaxKind::StringLiteral, start_pos, state.get_position());
172 return true;
173 }
174 }
175 false
176 }
177
178 fn lex_number<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
180 let start_pos = state.get_position();
181
182 if let Some(first) = state.peek() {
183 if !first.is_ascii_digit() {
185 return false;
186 }
187
188 while let Some(ch) = state.peek() {
190 if ch.is_ascii_digit() {
191 state.advance(ch.len_utf8());
192 }
193 else if ch == '.' {
194 state.advance(1);
196 while let Some(ch) = state.peek() {
197 if ch.is_ascii_digit() {
198 state.advance(ch.len_utf8());
199 }
200 else {
201 break;
202 }
203 }
204 break;
205 }
206 else {
207 break;
208 }
209 }
210
211 state.add_token(JasminSyntaxKind::Number, start_pos, state.get_position());
212 return true;
213 }
214
215 false
216 }
217
218 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
220 let start = state.get_position();
221 let first = match state.peek() {
222 Some(ch) => ch,
223 None => return false,
224 };
225
226 if !first.is_ascii_alphabetic() && first != '_' && first != '.' {
228 return false;
229 }
230
231 state.advance(first.len_utf8());
233
234 while let Some(ch) = state.peek() {
236 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '/' || ch == '$' || ch == '<' || ch == '>' {
237 state.advance(ch.len_utf8());
238 }
239 else {
240 break;
241 }
242 }
243
244 let end = state.get_position();
245 let text = state.get_text_in(oak_core::Range { start, end });
246 let kind = self.keyword_or_identifier(&text);
247 state.add_token(kind, start, state.get_position());
248 true
249 }
250
251 fn lex_operator_or_delimiter<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
253 let start = state.get_position();
254 let ch = match state.peek() {
255 Some(ch) => ch,
256 None => return false,
257 };
258
259 let kind = match ch {
260 '{' => JasminSyntaxKind::LeftBrace,
261 '}' => JasminSyntaxKind::RightBrace,
262 '(' => JasminSyntaxKind::LeftParen,
263 ')' => JasminSyntaxKind::RightParen,
264 '[' => JasminSyntaxKind::LeftBracket,
265 ']' => JasminSyntaxKind::RightBracket,
266 ':' => JasminSyntaxKind::Colon,
267 ';' => JasminSyntaxKind::Semicolon,
268 '.' => JasminSyntaxKind::Dot,
269 ',' => JasminSyntaxKind::Comma,
270 '/' => JasminSyntaxKind::Slash,
271 _ => return false,
272 };
273
274 state.advance(ch.len_utf8());
275 state.add_token(kind, start, state.get_position());
276 true
277 }
278
279 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
281 while state.not_at_end() {
282 let safe_point = state.get_position();
283
284 if self.skip_whitespace(state) {
286 continue;
287 }
288
289 if self.skip_comment(state) {
290 continue;
291 }
292
293 if self.lex_string(state) {
294 continue;
295 }
296
297 if self.lex_number(state) {
298 continue;
299 }
300
301 if self.lex_identifier_or_keyword(state) {
302 continue;
303 }
304
305 if self.lex_operator_or_delimiter(state) {
306 continue;
307 }
308
309 let start_pos = state.get_position();
311 if let Some(ch) = state.peek() {
312 state.advance(ch.len_utf8());
313 state.add_token(JasminSyntaxKind::Error, start_pos, state.get_position());
314 }
315
316 state.advance_if_dead_lock(safe_point);
317 }
318
319 Ok(())
320 }
321}
322
323impl<'config> Lexer<JasminLanguage> for JasminLexer<'config> {
324 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<JasminLanguage>) -> LexOutput<JasminLanguage> {
325 let mut state = LexerState::new(source);
326 let result = self.run(&mut state);
327 if result.is_ok() {
328 state.add_eof();
329 }
330 state.finish_with_cache(result, cache)
331 }
332}