1#![doc = include_str!("readme.md")]
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError, Source,
4 lexer::{CommentConfig, LexOutput, StringConfig},
5};
6pub mod token_type;
7
8use crate::{language::JasmLanguage, lexer::token_type::JasmTokenType};
9use std::sync::LazyLock;
10
11type State<'a, S> = LexerState<'a, S, JasmLanguage>;
12
13static JASM_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "", block_end: "", nested_blocks: false });
14static JASM_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
15
16#[derive(Clone, Debug)]
17pub struct JasmLexer<'config> {
18 _config: &'config JasmLanguage,
19}
20
21impl<'config> Lexer<JasmLanguage> for JasmLexer<'config> {
22 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], _cache: &'a mut impl LexerCache<JasmLanguage>) -> LexOutput<JasmLanguage> {
23 let mut state = State::new(source);
24 let result = self.run(&mut state);
25 state.finish(result)
26 }
27}
28
29impl<'config> JasmLexer<'config> {
30 pub fn new(config: &'config JasmLanguage) -> Self {
31 Self { _config: config }
32 }
33
34 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
36 while state.not_at_end() {
37 let safe_point = state.get_position();
38
39 if self.skip_whitespace(state) {
40 continue;
41 }
42
43 if self.lex_newline(state) {
44 continue;
45 }
46
47 if self.skip_comment(state) {
48 continue;
49 }
50
51 if self.lex_string_literal(state) {
52 continue;
53 }
54
55 if self.lex_number_literal(state) {
56 continue;
57 }
58
59 if self.lex_identifier_or_keyword(state) {
60 continue;
61 }
62
63 if self.lex_punctuation(state) {
64 continue;
65 }
66
67 state.advance_if_dead_lock(safe_point);
68 }
69
70 state.add_eof();
72 Ok(())
73 }
74
75 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
77 let start = state.get_position();
78
79 while let Some(ch) = state.peek() {
80 if ch == ' ' || ch == '\t' || ch == '\r' {
81 state.advance(ch.len_utf8());
82 }
83 else {
84 break;
85 }
86 }
87
88 if state.get_position() > start {
89 state.add_token(JasmTokenType::Whitespace, start, state.get_position());
90 return true;
91 }
92
93 false
94 }
95
96 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
98 let start = state.get_position();
99
100 if state.current() == Some('\n') {
101 state.advance(1);
102 state.add_token(JasmTokenType::Newline, start, state.get_position());
103 return true;
104 }
105 false
106 }
107
108 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
110 JASM_COMMENT.scan(state, JasmTokenType::Comment, JasmTokenType::Comment)
111 }
112
113 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
115 JASM_STRING.scan(state, JasmTokenType::StringLiteral)
116 }
117
118 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
120 let start = state.get_position();
121 let first = match state.peek() {
122 Some(c) => c,
123 None => return false,
124 };
125
126 if !first.is_ascii_digit() && first != '-' && first != '+' {
128 return false;
129 }
130
131 if first == '-' || first == '+' {
133 if let Some(next) = state.peek_next_n(1) {
134 if !next.is_ascii_digit() {
135 return false;
136 }
137 }
138 else {
139 return false;
140 }
141 }
142
143 state.advance(first.len_utf8());
144 let mut has_dot = false;
145 let mut has_exp = false;
146
147 while let Some(ch) = state.peek() {
148 if ch.is_ascii_digit() {
149 state.advance(ch.len_utf8());
150 }
151 else if ch == '.' && !has_dot && !has_exp {
152 has_dot = true;
153 state.advance(1);
154 }
155 else if (ch == 'e' || ch == 'E') && !has_exp {
156 has_exp = true;
157 state.advance(1);
158 if let Some(sign) = state.peek() {
160 if sign == '+' || sign == '-' {
161 state.advance(1);
162 }
163 }
164 }
165 else {
166 break;
167 }
168 }
169
170 state.add_token(JasmTokenType::Number, start, state.get_position());
171 true
172 }
173
174 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
176 let start = state.get_position();
177 let ch = match state.peek() {
178 Some(c) => c,
179 None => return false,
180 };
181
182 if !(ch.is_ascii_alphabetic() || ch == '_') {
184 return false;
185 }
186
187 state.advance(ch.len_utf8());
188 while let Some(c) = state.peek() {
189 if c.is_ascii_alphanumeric() || c == '_' {
190 state.advance(c.len_utf8());
191 }
192 else {
193 break;
194 }
195 }
196
197 let end = state.get_position();
198 let text = state.get_text_in((start..end).into());
199
200 let kind = self.classify_identifier(&text);
202 state.add_token(kind, start, state.get_position());
203 true
204 }
205
206 fn classify_identifier(&self, text: &str) -> JasmTokenType {
208 match text {
209 "class" => JasmTokenType::ClassKw,
211 "version" => JasmTokenType::VersionKw,
212 "method" => JasmTokenType::MethodKw,
213 "field" => JasmTokenType::FieldKw,
214 "string" => JasmTokenType::StringKw,
215 "sourcefile" => JasmTokenType::SourceFileKw,
216 "stack" => JasmTokenType::StackKw,
217 "locals" => JasmTokenType::LocalsKw,
218 "end" => JasmTokenType::EndKw,
219 "compiled" => JasmTokenType::CompiledKw,
220 "from" => JasmTokenType::FromKw,
221 "innerclass" => JasmTokenType::InnerClassKw,
222 "nestmembers" => JasmTokenType::NestMembersKw,
223 "bootstrapmethod" => JasmTokenType::BootstrapMethodKw,
224
225 "public" => JasmTokenType::Public,
227 "private" => JasmTokenType::Private,
228 "protected" => JasmTokenType::Protected,
229 "static" => JasmTokenType::Static,
230 "super" => JasmTokenType::Super,
231 "final" => JasmTokenType::Final,
232 "abstract" => JasmTokenType::Abstract,
233 "synchronized" => JasmTokenType::Synchronized,
234 "native" => JasmTokenType::Native,
235 "synthetic" => JasmTokenType::Synthetic,
236 "deprecated" => JasmTokenType::Deprecated,
237 "varargs" => JasmTokenType::Varargs,
238
239 "aload_0" => JasmTokenType::ALoad0,
241 "aload_1" => JasmTokenType::ALoad1,
242 "aload_2" => JasmTokenType::ALoad2,
243 "aload_3" => JasmTokenType::ALoad3,
244 "iload_0" => JasmTokenType::ILoad0,
245 "iload_1" => JasmTokenType::ILoad1,
246 "iload_2" => JasmTokenType::ILoad2,
247 "iload_3" => JasmTokenType::ILoad3,
248 "ldc" => JasmTokenType::Ldc,
249 "ldc_w" => JasmTokenType::LdcW,
250 "ldc2_w" => JasmTokenType::Ldc2W,
251 "invokespecial" => JasmTokenType::InvokeSpecial,
252 "invokevirtual" => JasmTokenType::InvokeVirtual,
253 "invokestatic" => JasmTokenType::InvokeStatic,
254 "invokeinterface" => JasmTokenType::InvokeInterface,
255 "invokedynamic" => JasmTokenType::InvokeDynamic,
256 "getstatic" => JasmTokenType::GetStatic,
257 "putstatic" => JasmTokenType::PutStatic,
258 "getfield" => JasmTokenType::GetField,
259 "putfield" => JasmTokenType::PutField,
260 "return" => JasmTokenType::Return,
261 "ireturn" => JasmTokenType::IReturn,
262 "areturn" => JasmTokenType::AReturn,
263 "lreturn" => JasmTokenType::LReturn,
264 "freturn" => JasmTokenType::FReturn,
265 "dreturn" => JasmTokenType::DReturn,
266 "nop" => JasmTokenType::Nop,
267 "dup" => JasmTokenType::Dup,
268 "pop" => JasmTokenType::Pop,
269 "new" => JasmTokenType::New,
270
271 _ => JasmTokenType::IdentifierToken,
273 }
274 }
275
276 fn lex_punctuation<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
278 let start = state.get_position();
279
280 if let Some(ch) = state.current() {
281 let kind = match ch {
282 '{' => JasmTokenType::LeftBrace,
283 '}' => JasmTokenType::RightBrace,
284 '(' => JasmTokenType::LeftParen,
285 ')' => JasmTokenType::RightParen,
286 '[' => JasmTokenType::LeftBracket,
287 ']' => JasmTokenType::RightBracket,
288 ':' => JasmTokenType::Colon,
289 ';' => JasmTokenType::Semicolon,
290 '.' => JasmTokenType::Dot,
291 ',' => JasmTokenType::Comma,
292 '/' => JasmTokenType::Slash,
293 _ => return false,
294 };
295
296 state.advance(ch.len_utf8());
297 state.add_token(kind, start, state.get_position());
298 return true;
299 }
300
301 false
302 }
303}