1use crate::{language::JasmLanguage, syntax::JasmSyntaxKind};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{CommentConfig, LexOutput, StringConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, JasmLanguage>;
10
11static JASM_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "", block_end: "", nested_blocks: false });
12static JASM_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
13
14#[derive(Clone, Default)]
15pub struct JasmLexer {}
16
17impl Lexer<JasmLanguage> for JasmLexer {
18 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], _cache: &'a mut impl LexerCache<JasmLanguage>) -> LexOutput<JasmLanguage> {
19 let mut state = State::new(source);
20 let result = self.run(&mut state);
21 state.finish(result)
22 }
23}
24
25impl JasmLexer {
26 pub fn new(_config: &JasmLanguage) -> Self {
27 Self {}
28 }
29
30 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
32 while state.not_at_end() {
33 let safe_point = state.get_position();
34
35 if self.skip_whitespace(state) {
36 continue;
37 }
38
39 if self.lex_newline(state) {
40 continue;
41 }
42
43 if self.skip_comment(state) {
44 continue;
45 }
46
47 if self.lex_string_literal(state) {
48 continue;
49 }
50
51 if self.lex_number_literal(state) {
52 continue;
53 }
54
55 if self.lex_identifier_or_keyword(state) {
56 continue;
57 }
58
59 if self.lex_punctuation(state) {
60 continue;
61 }
62
63 state.advance_if_dead_lock(safe_point);
64 }
65
66 state.add_eof();
68 Ok(())
69 }
70
71 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
73 let start = state.get_position();
74
75 while let Some(ch) = state.peek() {
76 if ch == ' ' || ch == '\t' || ch == '\r' {
77 state.advance(ch.len_utf8());
78 }
79 else {
80 break;
81 }
82 }
83
84 if state.get_position() > start {
85 state.add_token(JasmSyntaxKind::Whitespace, start, state.get_position());
86 return true;
87 }
88
89 false
90 }
91
92 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
94 let start = state.get_position();
95
96 if state.current() == Some('\n') {
97 state.advance(1);
98 state.add_token(JasmSyntaxKind::Newline, start, state.get_position());
99 return true;
100 }
101 false
102 }
103
104 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
106 JASM_COMMENT.scan(state, JasmSyntaxKind::Comment, JasmSyntaxKind::Comment)
107 }
108
109 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
111 JASM_STRING.scan(state, JasmSyntaxKind::StringLiteral)
112 }
113
114 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
116 let start = state.get_position();
117 let first = match state.peek() {
118 Some(c) => c,
119 None => return false,
120 };
121
122 if !first.is_ascii_digit() && first != '-' && first != '+' {
124 return false;
125 }
126
127 if first == '-' || first == '+' {
129 if let Some(next) = state.peek_next_n(1) {
130 if !next.is_ascii_digit() {
131 return false;
132 }
133 }
134 else {
135 return false;
136 }
137 }
138
139 state.advance(first.len_utf8());
140 let mut has_dot = false;
141 let mut has_exp = false;
142
143 while let Some(ch) = state.peek() {
144 if ch.is_ascii_digit() {
145 state.advance(ch.len_utf8());
146 }
147 else if ch == '.' && !has_dot && !has_exp {
148 has_dot = true;
149 state.advance(1);
150 }
151 else if (ch == 'e' || ch == 'E') && !has_exp {
152 has_exp = true;
153 state.advance(1);
154 if let Some(sign) = state.peek() {
156 if sign == '+' || sign == '-' {
157 state.advance(1);
158 }
159 }
160 }
161 else {
162 break;
163 }
164 }
165
166 state.add_token(JasmSyntaxKind::Number, start, state.get_position());
167 true
168 }
169
170 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
172 let start = state.get_position();
173 let ch = match state.peek() {
174 Some(c) => c,
175 None => return false,
176 };
177
178 if !(ch.is_ascii_alphabetic() || ch == '_') {
180 return false;
181 }
182
183 state.advance(ch.len_utf8());
184 while let Some(c) = state.peek() {
185 if c.is_ascii_alphanumeric() || c == '_' {
186 state.advance(c.len_utf8());
187 }
188 else {
189 break;
190 }
191 }
192
193 let end = state.get_position();
194 let text = state.get_text_in((start..end).into());
195
196 let kind = self.classify_identifier(&text);
198 state.add_token(kind, start, state.get_position());
199 true
200 }
201
202 fn classify_identifier(&self, text: &str) -> JasmSyntaxKind {
204 match text {
205 "class" => JasmSyntaxKind::ClassKw,
207 "version" => JasmSyntaxKind::VersionKw,
208 "method" => JasmSyntaxKind::MethodKw,
209 "field" => JasmSyntaxKind::FieldKw,
210 "string" => JasmSyntaxKind::StringKw,
211 "sourcefile" => JasmSyntaxKind::SourceFileKw,
212 "stack" => JasmSyntaxKind::StackKw,
213 "locals" => JasmSyntaxKind::LocalsKw,
214 "end" => JasmSyntaxKind::EndKw,
215 "compiled" => JasmSyntaxKind::CompiledKw,
216 "from" => JasmSyntaxKind::FromKw,
217 "innerclass" => JasmSyntaxKind::InnerClassKw,
218 "nestmembers" => JasmSyntaxKind::NestMembersKw,
219 "bootstrapmethod" => JasmSyntaxKind::BootstrapMethodKw,
220
221 "public" => JasmSyntaxKind::Public,
223 "private" => JasmSyntaxKind::Private,
224 "protected" => JasmSyntaxKind::Protected,
225 "static" => JasmSyntaxKind::Static,
226 "super" => JasmSyntaxKind::Super,
227 "final" => JasmSyntaxKind::Final,
228 "abstract" => JasmSyntaxKind::Abstract,
229 "synchronized" => JasmSyntaxKind::Synchronized,
230 "native" => JasmSyntaxKind::Native,
231 "synthetic" => JasmSyntaxKind::Synthetic,
232 "deprecated" => JasmSyntaxKind::Deprecated,
233 "varargs" => JasmSyntaxKind::Varargs,
234
235 "aload_0" => JasmSyntaxKind::ALoad0,
237 "aload_1" => JasmSyntaxKind::ALoad1,
238 "aload_2" => JasmSyntaxKind::ALoad2,
239 "aload_3" => JasmSyntaxKind::ALoad3,
240 "iload_0" => JasmSyntaxKind::ILoad0,
241 "iload_1" => JasmSyntaxKind::ILoad1,
242 "iload_2" => JasmSyntaxKind::ILoad2,
243 "iload_3" => JasmSyntaxKind::ILoad3,
244 "ldc" => JasmSyntaxKind::Ldc,
245 "ldc_w" => JasmSyntaxKind::LdcW,
246 "ldc2_w" => JasmSyntaxKind::Ldc2W,
247 "invokespecial" => JasmSyntaxKind::InvokeSpecial,
248 "invokevirtual" => JasmSyntaxKind::InvokeVirtual,
249 "invokestatic" => JasmSyntaxKind::InvokeStatic,
250 "invokeinterface" => JasmSyntaxKind::InvokeInterface,
251 "invokedynamic" => JasmSyntaxKind::InvokeDynamic,
252 "getstatic" => JasmSyntaxKind::GetStatic,
253 "putstatic" => JasmSyntaxKind::PutStatic,
254 "getfield" => JasmSyntaxKind::GetField,
255 "putfield" => JasmSyntaxKind::PutField,
256 "return" => JasmSyntaxKind::Return,
257 "ireturn" => JasmSyntaxKind::IReturn,
258 "areturn" => JasmSyntaxKind::AReturn,
259 "lreturn" => JasmSyntaxKind::LReturn,
260 "freturn" => JasmSyntaxKind::FReturn,
261 "dreturn" => JasmSyntaxKind::DReturn,
262 "nop" => JasmSyntaxKind::Nop,
263 "dup" => JasmSyntaxKind::Dup,
264 "pop" => JasmSyntaxKind::Pop,
265 "new" => JasmSyntaxKind::New,
266
267 _ => JasmSyntaxKind::IdentifierToken,
269 }
270 }
271
272 fn lex_punctuation<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
274 let start = state.get_position();
275
276 if let Some(ch) = state.current() {
277 let kind = match ch {
278 '{' => JasmSyntaxKind::LeftBrace,
279 '}' => JasmSyntaxKind::RightBrace,
280 '(' => JasmSyntaxKind::LeftParen,
281 ')' => JasmSyntaxKind::RightParen,
282 '[' => JasmSyntaxKind::LeftBracket,
283 ']' => JasmSyntaxKind::RightBracket,
284 ':' => JasmSyntaxKind::Colon,
285 ';' => JasmSyntaxKind::Semicolon,
286 '.' => JasmSyntaxKind::Dot,
287 ',' => JasmSyntaxKind::Comma,
288 '/' => JasmSyntaxKind::Slash,
289 _ => return false,
290 };
291
292 state.advance(ch.len_utf8());
293 state.add_token(kind, start, state.get_position());
294 return true;
295 }
296
297 false
298 }
299}