1#![doc = include_str!("readme.md")]
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError, Source,
4 lexer::{CommentConfig, LexOutput, StringConfig},
5};
6pub mod token_type;
8
9use crate::{language::JasmLanguage, lexer::token_type::JasmTokenType};
10use std::sync::LazyLock;
11
12pub(crate) type State<'a, S> = LexerState<'a, S, JasmLanguage>;
13
14static JASM_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "", block_end: "", nested_blocks: false });
15static JASM_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
16
17#[derive(Clone, Debug)]
19pub struct JasmLexer<'config> {
20 config: &'config JasmLanguage,
21}
22
23impl<'config> Lexer<JasmLanguage> for JasmLexer<'config> {
24 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], _cache: &'a mut impl LexerCache<JasmLanguage>) -> LexOutput<JasmLanguage> {
25 let mut state = State::new(source);
26 let result = self.run(&mut state);
27 state.finish(result)
28 }
29}
30
31impl<'config> JasmLexer<'config> {
32 pub fn new(config: &'config JasmLanguage) -> Self {
34 Self { config }
35 }
36
37 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
39 while state.not_at_end() {
40 let safe_point = state.get_position();
41
42 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.lex_newline(state) {
47 continue;
48 }
49
50 if self.skip_comment(state) {
51 continue;
52 }
53
54 if self.lex_string_literal(state) {
55 continue;
56 }
57
58 if self.lex_number_literal(state) {
59 continue;
60 }
61
62 if self.lex_identifier_or_keyword(state) {
63 continue;
64 }
65
66 if self.lex_punctuation(state) {
67 continue;
68 }
69
70 state.advance_if_dead_lock(safe_point);
71 }
72
73 state.add_eof();
75 Ok(())
76 }
77
78 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
80 let start = state.get_position();
81
82 while let Some(ch) = state.peek() {
83 if ch == ' ' || ch == '\t' || ch == '\r' {
84 state.advance(ch.len_utf8());
85 }
86 else {
87 break;
88 }
89 }
90
91 if state.get_position() > start {
92 state.add_token(JasmTokenType::Whitespace, start, state.get_position());
93 return true;
94 }
95
96 false
97 }
98
99 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
101 let start = state.get_position();
102
103 if state.current() == Some('\n') {
104 state.advance(1);
105 state.add_token(JasmTokenType::Newline, start, state.get_position());
106 return true;
107 }
108 false
109 }
110
111 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
113 if !self.config.comments {
114 return false;
115 }
116 JASM_COMMENT.scan(state, JasmTokenType::Comment, JasmTokenType::Comment)
117 }
118
119 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
121 JASM_STRING.scan(state, JasmTokenType::String)
122 }
123
124 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
126 let start = state.get_position();
127 let first = match state.peek() {
128 Some(c) => c,
129 None => return false,
130 };
131
132 if !first.is_ascii_digit() && first != '-' && first != '+' {
134 return false;
135 }
136
137 if first == '-' || first == '+' {
139 if let Some(next) = state.peek_next_n(1) {
140 if !next.is_ascii_digit() {
141 return false;
142 }
143 }
144 else {
145 return false;
146 }
147 }
148
149 state.advance(first.len_utf8());
150 let mut has_dot = false;
151 let mut has_exp = false;
152
153 while let Some(ch) = state.peek() {
154 if ch.is_ascii_digit() {
155 state.advance(ch.len_utf8());
156 }
157 else if ch == '.' && !has_dot && !has_exp {
158 has_dot = true;
159 state.advance(1);
160 }
161 else if (ch == 'e' || ch == 'E') && !has_exp {
162 has_exp = true;
163 state.advance(1);
164 if let Some(sign) = state.peek() {
166 if sign == '+' || sign == '-' {
167 state.advance(1);
168 }
169 }
170 }
171 else {
172 break;
173 }
174 }
175
176 state.add_token(JasmTokenType::Number, start, state.get_position());
177 true
178 }
179
180 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
182 let start = state.get_position();
183 let ch = match state.peek() {
184 Some(c) => c,
185 None => return false,
186 };
187
188 if !(ch.is_ascii_alphabetic() || ch == '_') {
190 return false;
191 }
192
193 state.advance(ch.len_utf8());
194 while let Some(c) = state.peek() {
195 if c.is_ascii_alphanumeric() || c == '_' {
196 state.advance(c.len_utf8());
197 }
198 else {
199 break;
200 }
201 }
202
203 let end = state.get_position();
204 let text = state.get_text_in((start..end).into());
205
206 let kind = self.classify_identifier(&text);
208 state.add_token(kind, start, state.get_position());
209 true
210 }
211
212 fn classify_identifier(&self, text: &str) -> JasmTokenType {
214 match text {
215 "class" => JasmTokenType::ClassKw,
217 "version" => JasmTokenType::VersionKw,
218 "method" => JasmTokenType::MethodKw,
219 "field" => JasmTokenType::FieldKw,
220 "string" => JasmTokenType::StringKw,
221 "source" => JasmTokenType::SourceKw,
222 "sourcefile" => JasmTokenType::SourceFileKw,
223 "stack" => JasmTokenType::StackKw,
224 "locals" => JasmTokenType::LocalsKw,
225 "end" => JasmTokenType::EndKw,
226 "compiled" => JasmTokenType::CompiledKw,
227 "from" => JasmTokenType::FromKw,
228 "innerclass" => JasmTokenType::InnerClassKw,
229 "nestmembers" => JasmTokenType::NestMembersKw,
230 "bootstrapmethod" => JasmTokenType::BootstrapMethodKw,
231 "interface" => JasmTokenType::InterfaceKw,
232 "extends" => JasmTokenType::ExtendsKw,
233 "implements" => JasmTokenType::ImplementsKw,
234 "catch" => JasmTokenType::CatchKw,
235 "attribute" => JasmTokenType::AttributeKw,
236 "stackmap" => JasmTokenType::StackMapKw,
237
238 "public" => JasmTokenType::Public,
240 "private" => JasmTokenType::Private,
241 "protected" => JasmTokenType::Protected,
242 "static" => JasmTokenType::Static,
243 "super" => JasmTokenType::Super,
244 "final" => JasmTokenType::Final,
245 "abstract" => JasmTokenType::Abstract,
246 "synchronized" => JasmTokenType::Synchronized,
247 "native" => JasmTokenType::Native,
248 "synthetic" => JasmTokenType::Synthetic,
249 "deprecated" => JasmTokenType::Deprecated,
250 "varargs" => JasmTokenType::Varargs,
251
252 "aload_0" => JasmTokenType::ALoad0,
254 "aload_1" => JasmTokenType::ALoad1,
255 "aload_2" => JasmTokenType::ALoad2,
256 "aload_3" => JasmTokenType::ALoad3,
257 "iload_0" => JasmTokenType::ILoad0,
258 "iload_1" => JasmTokenType::ILoad1,
259 "iload_2" => JasmTokenType::ILoad2,
260 "iload_3" => JasmTokenType::ILoad3,
261 "ldc" => JasmTokenType::Ldc,
262 "ldc_w" => JasmTokenType::LdcW,
263 "ldc2_w" => JasmTokenType::Ldc2W,
264 "invokespecial" => JasmTokenType::InvokeSpecial,
265 "invokevirtual" => JasmTokenType::InvokeVirtual,
266 "invokestatic" => JasmTokenType::InvokeStatic,
267 "getstatic" => JasmTokenType::GetStatic,
268 "putstatic" => JasmTokenType::PutStatic,
269 "getfield" => JasmTokenType::GetField,
270 "putfield" => JasmTokenType::PutField,
271 "return" => JasmTokenType::Return,
272 "ireturn" => JasmTokenType::IReturn,
273 "areturn" => JasmTokenType::AReturn,
274 "lreturn" => JasmTokenType::LReturn,
275 "freturn" => JasmTokenType::FReturn,
276 "dreturn" => JasmTokenType::DReturn,
277 "nop" => JasmTokenType::Nop,
278 "dup" => JasmTokenType::Dup,
279 "pop" => JasmTokenType::Pop,
280 "new" => JasmTokenType::New,
281
282 _ if self.config.extended => match text {
284 "invokeinterface" => JasmTokenType::InvokeInterface,
285 "invokedynamic" => JasmTokenType::InvokeDynamic,
286 "checkcast" => JasmTokenType::CheckCast,
287 "instanceof" => JasmTokenType::InstanceOf,
288 "newarray" => JasmTokenType::NewArray,
289 "anewarray" => JasmTokenType::ANewArray,
290 "arraylength" => JasmTokenType::ArrayLength,
291 "athrow" => JasmTokenType::AThrow,
292 "monitorenter" => JasmTokenType::MonitorEnter,
293 "monitorexit" => JasmTokenType::MonitorExit,
294 "multianewarray" => JasmTokenType::MultiANewArray,
295 "ifnull" => JasmTokenType::IfNull,
296 "ifnonnull" => JasmTokenType::IfNonNull,
297 "goto" => JasmTokenType::Goto,
298 "goto_w" => JasmTokenType::GotoW,
299 "jsr" => JasmTokenType::Jsr,
300 "jsr_w" => JasmTokenType::JsrW,
301 "ret" => JasmTokenType::Ret,
302 "tableswitch" => JasmTokenType::TableSwitch,
303 "lookupswitch" => JasmTokenType::LookupSwitch,
304 "bipush" => JasmTokenType::BiPush,
305 "sipush" => JasmTokenType::SiPush,
306 "iinc" => JasmTokenType::IInc,
307 "wide" => JasmTokenType::Wide,
308 "breakpoint" => JasmTokenType::BreakPoint,
309 "impdep1" => JasmTokenType::ImpDep1,
310 "impdep2" => JasmTokenType::ImpDep2,
311 _ => JasmTokenType::Identifier,
312 },
313
314 _ => JasmTokenType::Identifier,
316 }
317 }
318
319 fn lex_punctuation<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
321 let start = state.get_position();
322
323 if let Some(ch) = state.current() {
324 let kind = match ch {
325 '{' => JasmTokenType::LeftBrace,
326 '}' => JasmTokenType::RightBrace,
327 '(' => JasmTokenType::LeftParen,
328 ')' => JasmTokenType::RightParen,
329 '[' => JasmTokenType::LeftBracket,
330 ']' => JasmTokenType::RightBracket,
331 ':' => JasmTokenType::Colon,
332 ';' => JasmTokenType::Semicolon,
333 '.' => JasmTokenType::Dot,
334 ',' => JasmTokenType::Comma,
335 '/' => JasmTokenType::Slash,
336 '@' => JasmTokenType::At,
337 _ => return false,
338 };
339
340 state.advance(ch.len_utf8());
341 state.add_token(kind, start, state.get_position());
342 return true;
343 }
344
345 false
346 }
347}