1use crate::{language::JasmLanguage, syntax::JasmSyntaxKind};
2use oak_core::{
3 IncrementalCache, Lexer, LexerState, OakError,
4 lexer::{CommentLine, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, JasmLanguage>;
10
11static JASM_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static JASM_COMMENT: LazyLock<CommentLine> = LazyLock::new(|| CommentLine { line_markers: &["//"] });
13static JASM_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct JasmLexer<'config> {
17 config: &'config JasmLanguage,
18}
19
20impl<'config> Lexer<JasmLanguage> for JasmLexer<'config> {
21 fn lex_incremental(
22 &self,
23 source: impl Source,
24 changed: usize,
25 cache: IncrementalCache<JasmLanguage>,
26 ) -> LexOutput<JasmLanguage> {
27 let mut state = LexerState::new_with_cache(source, changed, cache);
28 let result = self.run(&mut state);
29 state.finish(result)
30 }
31}
32
33impl<'config> JasmLexer<'config> {
34 pub fn new(config: &'config JasmLanguage) -> Self {
35 Self { config }
36 }
37
38 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
40 while state.not_at_end() {
41 let safe_point = state.get_position();
42
43 if self.skip_whitespace(state) {
44 continue;
45 }
46
47 if self.lex_newline(state) {
48 continue;
49 }
50
51 if self.skip_comment(state) {
52 continue;
53 }
54
55 if self.lex_string_literal(state) {
56 continue;
57 }
58
59 if self.lex_number_literal(state) {
60 continue;
61 }
62
63 if self.lex_identifier_or_keyword(state) {
64 continue;
65 }
66
67 if self.lex_punctuation(state) {
68 continue;
69 }
70
71 state.safe_check(safe_point);
72 }
73
74 let eof_pos = state.get_position();
76 state.add_token(JasmSyntaxKind::Eof, eof_pos, eof_pos);
77 Ok(())
78 }
79
80 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
82 let start = state.get_position();
83
84 while let Some(ch) = state.peek() {
85 if ch == ' ' || ch == '\t' || ch == '\r' {
86 state.advance(ch.len_utf8());
87 }
88 else {
89 break;
90 }
91 }
92
93 if state.get_position() > start {
94 state.add_token(JasmSyntaxKind::Whitespace, start, state.get_position());
95 return true;
96 }
97 false
98 }
99
100 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
102 let start = state.get_position();
103
104 if state.current() == Some('\n') {
105 state.advance(1);
106 state.add_token(JasmSyntaxKind::Newline, start, state.get_position());
107 return true;
108 }
109 false
110 }
111
112 fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
114 let start = state.get_position();
115 let rest = state.rest();
116
117 if rest.starts_with("//") {
118 state.advance(2);
120
121 while let Some(ch) = state.peek() {
123 if ch != '\n' {
124 state.advance(ch.len_utf8());
125 }
126 else {
127 break;
128 }
129 }
130
131 state.add_token(JasmSyntaxKind::Comment, start, state.get_position());
132 return true;
133 }
134 false
135 }
136
137 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
139 let start = state.get_position();
140
141 if state.current() == Some('"') {
142 state.advance(1);
144
145 while let Some(ch) = state.peek() {
146 if ch != '"' {
147 if ch == '\\' {
148 state.advance(1); if let Some(_) = state.peek() {
150 state.advance(1); }
152 }
153 else {
154 state.advance(ch.len_utf8());
155 }
156 }
157 else {
158 state.advance(1);
160 break;
161 }
162 }
163
164 state.add_token(JasmSyntaxKind::StringLiteral, start, state.get_position());
165 return true;
166 }
167 false
168 }
169
170 fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
172 let start = state.get_position();
173 let first = match state.current() {
174 Some(c) => c,
175 None => return false,
176 };
177
178 if !first.is_ascii_digit() && first != '-' && first != '+' {
180 return false;
181 }
182
183 if first == '-' || first == '+' {
185 if let Some(next) = state.peek_next_n(1) {
186 if !next.is_ascii_digit() {
187 return false;
188 }
189 }
190 else {
191 return false;
192 }
193 }
194
195 state.advance(1);
196 let mut has_dot = false;
197 let mut has_exp = false;
198
199 while let Some(ch) = state.peek() {
200 if ch.is_ascii_digit() {
201 state.advance(1);
202 }
203 else if ch == '.' && !has_dot && !has_exp {
204 has_dot = true;
205 state.advance(1);
206 }
207 else if (ch == 'e' || ch == 'E') && !has_exp {
208 has_exp = true;
209 state.advance(1);
210 if let Some(sign) = state.peek() {
212 if sign == '+' || sign == '-' {
213 state.advance(1);
214 }
215 }
216 }
217 else {
218 break;
219 }
220 }
221
222 let end = state.get_position();
224 let text = state.get_text_in((start..end).into());
225
226 if text == "-" || text == "+" || text == "." {
228 state.set_position(start);
230 return false;
231 }
232
233 state.add_token(JasmSyntaxKind::Number, start, state.get_position());
234 true
235 }
236
237 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
239 let start = state.get_position();
240 let ch = match state.current() {
241 Some(c) => c,
242 None => return false,
243 };
244
245 if !(ch.is_ascii_alphabetic() || ch == '_') {
247 return false;
248 }
249
250 state.advance(1);
251 while let Some(c) = state.current() {
252 if c.is_ascii_alphanumeric() || c == '_' {
253 state.advance(1);
254 }
255 else {
256 break;
257 }
258 }
259
260 let end = state.get_position();
261 let text = state.get_text_in((start..end).into());
262
263 let kind = self.classify_identifier(text);
265 state.add_token(kind, start, state.get_position());
266 true
267 }
268
269 fn classify_identifier(&self, text: &str) -> JasmSyntaxKind {
271 match text {
272 "class" => JasmSyntaxKind::ClassKw,
274 "version" => JasmSyntaxKind::VersionKw,
275 "method" => JasmSyntaxKind::MethodKw,
276 "field" => JasmSyntaxKind::FieldKw,
277 "string" => JasmSyntaxKind::StringKw,
278 "sourcefile" => JasmSyntaxKind::SourceFileKw,
279 "stack" => JasmSyntaxKind::StackKw,
280 "locals" => JasmSyntaxKind::LocalsKw,
281 "end" => JasmSyntaxKind::EndKw,
282 "compiled" => JasmSyntaxKind::CompiledKw,
283 "from" => JasmSyntaxKind::FromKw,
284 "innerclass" => JasmSyntaxKind::InnerClassKw,
285 "nestmembers" => JasmSyntaxKind::NestMembersKw,
286 "bootstrapmethod" => JasmSyntaxKind::BootstrapMethodKw,
287
288 "public" => JasmSyntaxKind::Public,
290 "private" => JasmSyntaxKind::Private,
291 "protected" => JasmSyntaxKind::Protected,
292 "static" => JasmSyntaxKind::Static,
293 "super" => JasmSyntaxKind::Super,
294 "final" => JasmSyntaxKind::Final,
295 "abstract" => JasmSyntaxKind::Abstract,
296 "synchronized" => JasmSyntaxKind::Synchronized,
297 "native" => JasmSyntaxKind::Native,
298 "synthetic" => JasmSyntaxKind::Synthetic,
299 "deprecated" => JasmSyntaxKind::Deprecated,
300 "varargs" => JasmSyntaxKind::Varargs,
301
302 "aload_0" => JasmSyntaxKind::ALoad0,
304 "aload_1" => JasmSyntaxKind::ALoad1,
305 "aload_2" => JasmSyntaxKind::ALoad2,
306 "aload_3" => JasmSyntaxKind::ALoad3,
307 "iload_0" => JasmSyntaxKind::ILoad0,
308 "iload_1" => JasmSyntaxKind::ILoad1,
309 "iload_2" => JasmSyntaxKind::ILoad2,
310 "iload_3" => JasmSyntaxKind::ILoad3,
311 "ldc" => JasmSyntaxKind::Ldc,
312 "ldc_w" => JasmSyntaxKind::LdcW,
313 "ldc2_w" => JasmSyntaxKind::Ldc2W,
314 "invokespecial" => JasmSyntaxKind::InvokeSpecial,
315 "invokevirtual" => JasmSyntaxKind::InvokeVirtual,
316 "invokestatic" => JasmSyntaxKind::InvokeStatic,
317 "invokeinterface" => JasmSyntaxKind::InvokeInterface,
318 "invokedynamic" => JasmSyntaxKind::InvokeDynamic,
319 "getstatic" => JasmSyntaxKind::GetStatic,
320 "putstatic" => JasmSyntaxKind::PutStatic,
321 "getfield" => JasmSyntaxKind::GetField,
322 "putfield" => JasmSyntaxKind::PutField,
323 "return" => JasmSyntaxKind::Return,
324 "ireturn" => JasmSyntaxKind::IReturn,
325 "areturn" => JasmSyntaxKind::AReturn,
326 "lreturn" => JasmSyntaxKind::LReturn,
327 "freturn" => JasmSyntaxKind::FReturn,
328 "dreturn" => JasmSyntaxKind::DReturn,
329 "nop" => JasmSyntaxKind::Nop,
330 "dup" => JasmSyntaxKind::Dup,
331 "pop" => JasmSyntaxKind::Pop,
332 "new" => JasmSyntaxKind::New,
333
334 _ => JasmSyntaxKind::IdentifierToken,
336 }
337 }
338
339 fn lex_punctuation<S: Source>(&self, state: &mut State<S>) -> bool {
341 let start = state.get_position();
342
343 if let Some(ch) = state.current() {
344 let kind = match ch {
345 '{' => JasmSyntaxKind::LeftBrace,
346 '}' => JasmSyntaxKind::RightBrace,
347 '(' => JasmSyntaxKind::LeftParen,
348 ')' => JasmSyntaxKind::RightParen,
349 '[' => JasmSyntaxKind::LeftBracket,
350 ']' => JasmSyntaxKind::RightBracket,
351 ':' => JasmSyntaxKind::Colon,
352 ';' => JasmSyntaxKind::Semicolon,
353 '.' => JasmSyntaxKind::Dot,
354 ',' => JasmSyntaxKind::Comma,
355 '/' => JasmSyntaxKind::Slash,
356 _ => return false,
357 };
358
359 state.advance(ch.len_utf8());
360 state.add_token(kind, start, state.get_position());
361 return true;
362 }
363
364 false
365 }
366}