1use crate::{language::JasmLanguage, syntax::JasmSyntaxKind};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{CommentConfig, LexOutput, StringConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'a, S> = LexerState<'a, S, JasmLanguage>;
10
11static JASM_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "", block_end: "", nested_blocks: false });
12static JASM_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
13
14#[derive(Clone, Debug)]
15pub struct JasmLexer<'config> {
16 _config: &'config JasmLanguage,
17}
18
19impl<'config> Lexer<JasmLanguage> for JasmLexer<'config> {
20 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], _cache: &'a mut impl LexerCache<JasmLanguage>) -> LexOutput<JasmLanguage> {
21 let mut state = State::new(source);
22 let result = self.run(&mut state);
23 state.finish(result)
24 }
25}
26
27impl<'config> JasmLexer<'config> {
28 pub fn new(config: &'config JasmLanguage) -> Self {
29 Self { _config: config }
30 }
31
32 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
34 while state.not_at_end() {
35 let safe_point = state.get_position();
36
37 if self.skip_whitespace(state) {
38 continue;
39 }
40
41 if self.lex_newline(state) {
42 continue;
43 }
44
45 if self.skip_comment(state) {
46 continue;
47 }
48
49 if self.lex_string_literal(state) {
50 continue;
51 }
52
53 if self.lex_number_literal(state) {
54 continue;
55 }
56
57 if self.lex_identifier_or_keyword(state) {
58 continue;
59 }
60
61 if self.lex_punctuation(state) {
62 continue;
63 }
64
65 state.advance_if_dead_lock(safe_point);
66 }
67
68 state.add_eof();
70 Ok(())
71 }
72
73 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
75 let start = state.get_position();
76
77 while let Some(ch) = state.peek() {
78 if ch == ' ' || ch == '\t' || ch == '\r' {
79 state.advance(ch.len_utf8());
80 }
81 else {
82 break;
83 }
84 }
85
86 if state.get_position() > start {
87 state.add_token(JasmSyntaxKind::Whitespace, start, state.get_position());
88 return true;
89 }
90
91 false
92 }
93
94 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
96 let start = state.get_position();
97
98 if state.current() == Some('\n') {
99 state.advance(1);
100 state.add_token(JasmSyntaxKind::Newline, start, state.get_position());
101 return true;
102 }
103 false
104 }
105
106 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
108 JASM_COMMENT.scan(state, JasmSyntaxKind::Comment, JasmSyntaxKind::Comment)
109 }
110
111 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
113 JASM_STRING.scan(state, JasmSyntaxKind::StringLiteral)
114 }
115
116 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
118 let start = state.get_position();
119 let first = match state.peek() {
120 Some(c) => c,
121 None => return false,
122 };
123
124 if !first.is_ascii_digit() && first != '-' && first != '+' {
126 return false;
127 }
128
129 if first == '-' || first == '+' {
131 if let Some(next) = state.peek_next_n(1) {
132 if !next.is_ascii_digit() {
133 return false;
134 }
135 }
136 else {
137 return false;
138 }
139 }
140
141 state.advance(first.len_utf8());
142 let mut has_dot = false;
143 let mut has_exp = false;
144
145 while let Some(ch) = state.peek() {
146 if ch.is_ascii_digit() {
147 state.advance(ch.len_utf8());
148 }
149 else if ch == '.' && !has_dot && !has_exp {
150 has_dot = true;
151 state.advance(1);
152 }
153 else if (ch == 'e' || ch == 'E') && !has_exp {
154 has_exp = true;
155 state.advance(1);
156 if let Some(sign) = state.peek() {
158 if sign == '+' || sign == '-' {
159 state.advance(1);
160 }
161 }
162 }
163 else {
164 break;
165 }
166 }
167
168 state.add_token(JasmSyntaxKind::Number, start, state.get_position());
169 true
170 }
171
172 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
174 let start = state.get_position();
175 let ch = match state.peek() {
176 Some(c) => c,
177 None => return false,
178 };
179
180 if !(ch.is_ascii_alphabetic() || ch == '_') {
182 return false;
183 }
184
185 state.advance(ch.len_utf8());
186 while let Some(c) = state.peek() {
187 if c.is_ascii_alphanumeric() || c == '_' {
188 state.advance(c.len_utf8());
189 }
190 else {
191 break;
192 }
193 }
194
195 let end = state.get_position();
196 let text = state.get_text_in((start..end).into());
197
198 let kind = self.classify_identifier(&text);
200 state.add_token(kind, start, state.get_position());
201 true
202 }
203
204 fn classify_identifier(&self, text: &str) -> JasmSyntaxKind {
206 match text {
207 "class" => JasmSyntaxKind::ClassKw,
209 "version" => JasmSyntaxKind::VersionKw,
210 "method" => JasmSyntaxKind::MethodKw,
211 "field" => JasmSyntaxKind::FieldKw,
212 "string" => JasmSyntaxKind::StringKw,
213 "sourcefile" => JasmSyntaxKind::SourceFileKw,
214 "stack" => JasmSyntaxKind::StackKw,
215 "locals" => JasmSyntaxKind::LocalsKw,
216 "end" => JasmSyntaxKind::EndKw,
217 "compiled" => JasmSyntaxKind::CompiledKw,
218 "from" => JasmSyntaxKind::FromKw,
219 "innerclass" => JasmSyntaxKind::InnerClassKw,
220 "nestmembers" => JasmSyntaxKind::NestMembersKw,
221 "bootstrapmethod" => JasmSyntaxKind::BootstrapMethodKw,
222
223 "public" => JasmSyntaxKind::Public,
225 "private" => JasmSyntaxKind::Private,
226 "protected" => JasmSyntaxKind::Protected,
227 "static" => JasmSyntaxKind::Static,
228 "super" => JasmSyntaxKind::Super,
229 "final" => JasmSyntaxKind::Final,
230 "abstract" => JasmSyntaxKind::Abstract,
231 "synchronized" => JasmSyntaxKind::Synchronized,
232 "native" => JasmSyntaxKind::Native,
233 "synthetic" => JasmSyntaxKind::Synthetic,
234 "deprecated" => JasmSyntaxKind::Deprecated,
235 "varargs" => JasmSyntaxKind::Varargs,
236
237 "aload_0" => JasmSyntaxKind::ALoad0,
239 "aload_1" => JasmSyntaxKind::ALoad1,
240 "aload_2" => JasmSyntaxKind::ALoad2,
241 "aload_3" => JasmSyntaxKind::ALoad3,
242 "iload_0" => JasmSyntaxKind::ILoad0,
243 "iload_1" => JasmSyntaxKind::ILoad1,
244 "iload_2" => JasmSyntaxKind::ILoad2,
245 "iload_3" => JasmSyntaxKind::ILoad3,
246 "ldc" => JasmSyntaxKind::Ldc,
247 "ldc_w" => JasmSyntaxKind::LdcW,
248 "ldc2_w" => JasmSyntaxKind::Ldc2W,
249 "invokespecial" => JasmSyntaxKind::InvokeSpecial,
250 "invokevirtual" => JasmSyntaxKind::InvokeVirtual,
251 "invokestatic" => JasmSyntaxKind::InvokeStatic,
252 "invokeinterface" => JasmSyntaxKind::InvokeInterface,
253 "invokedynamic" => JasmSyntaxKind::InvokeDynamic,
254 "getstatic" => JasmSyntaxKind::GetStatic,
255 "putstatic" => JasmSyntaxKind::PutStatic,
256 "getfield" => JasmSyntaxKind::GetField,
257 "putfield" => JasmSyntaxKind::PutField,
258 "return" => JasmSyntaxKind::Return,
259 "ireturn" => JasmSyntaxKind::IReturn,
260 "areturn" => JasmSyntaxKind::AReturn,
261 "lreturn" => JasmSyntaxKind::LReturn,
262 "freturn" => JasmSyntaxKind::FReturn,
263 "dreturn" => JasmSyntaxKind::DReturn,
264 "nop" => JasmSyntaxKind::Nop,
265 "dup" => JasmSyntaxKind::Dup,
266 "pop" => JasmSyntaxKind::Pop,
267 "new" => JasmSyntaxKind::New,
268
269 _ => JasmSyntaxKind::IdentifierToken,
271 }
272 }
273
274 fn lex_punctuation<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
276 let start = state.get_position();
277
278 if let Some(ch) = state.current() {
279 let kind = match ch {
280 '{' => JasmSyntaxKind::LeftBrace,
281 '}' => JasmSyntaxKind::RightBrace,
282 '(' => JasmSyntaxKind::LeftParen,
283 ')' => JasmSyntaxKind::RightParen,
284 '[' => JasmSyntaxKind::LeftBracket,
285 ']' => JasmSyntaxKind::RightBracket,
286 ':' => JasmSyntaxKind::Colon,
287 ';' => JasmSyntaxKind::Semicolon,
288 '.' => JasmSyntaxKind::Dot,
289 ',' => JasmSyntaxKind::Comma,
290 '/' => JasmSyntaxKind::Slash,
291 _ => return false,
292 };
293
294 state.advance(ch.len_utf8());
295 state.add_token(kind, start, state.get_position());
296 return true;
297 }
298
299 false
300 }
301}