1#![doc = include_str!("readme.md")]
4use oak_core::{
5 Lexer, LexerCache, LexerState, OakError, Source,
6 lexer::{CommentConfig, LexOutput, StringConfig},
7};
8pub mod token_type;
10
11use crate::{language::JasmLanguage, lexer::token_type::JasmTokenType};
12use std::sync::LazyLock;
13
14pub(crate) type State<'a, S> = LexerState<'a, S, JasmLanguage>;
15
16static JASM_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "", block_end: "", nested_blocks: false });
17static JASM_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18
19#[derive(Clone, Debug)]
21pub struct JasmLexer<'config> {
22 config: &'config JasmLanguage,
23}
24
25impl<'config> Lexer<JasmLanguage> for JasmLexer<'config> {
26 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::TextEdit], _cache: &'a mut impl LexerCache<JasmLanguage>) -> LexOutput<JasmLanguage> {
27 let mut state = State::new(source);
28 let result = self.run(&mut state);
29 state.finish(result)
30 }
31}
32
33impl<'config> JasmLexer<'config> {
34 pub fn new(config: &'config JasmLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
41 while state.not_at_end() {
42 let safe_point = state.get_position();
43
44 if self.skip_whitespace(state) {
45 continue;
46 }
47
48 if self.lex_newline(state) {
49 continue;
50 }
51
52 if self.skip_comment(state) {
53 continue;
54 }
55
56 if self.lex_string_literal(state) {
57 continue;
58 }
59
60 if self.lex_number_literal(state) {
61 continue;
62 }
63
64 if self.lex_identifier_or_keyword(state) {
65 continue;
66 }
67
68 if self.lex_punctuation(state) {
69 continue;
70 }
71
72 state.advance_if_dead_lock(safe_point);
73 }
74
75 state.add_eof();
77 Ok(())
78 }
79
80 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
82 let start = state.get_position();
83
84 while let Some(ch) = state.peek() {
85 if ch == ' ' || ch == '\t' || ch == '\r' {
86 state.advance(ch.len_utf8());
87 }
88 else {
89 break;
90 }
91 }
92
93 if state.get_position() > start {
94 state.add_token(JasmTokenType::Whitespace, start, state.get_position());
95 return true;
96 }
97
98 false
99 }
100
101 fn lex_newline<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
103 let start = state.get_position();
104
105 if state.current() == Some('\n') {
106 state.advance(1);
107 state.add_token(JasmTokenType::Newline, start, state.get_position());
108 return true;
109 }
110 false
111 }
112
113 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
115 JASM_COMMENT.scan(state, JasmTokenType::Comment, JasmTokenType::Comment)
116 }
117
118 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
120 JASM_STRING.scan(state, JasmTokenType::String)
121 }
122
123 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
125 let start = state.get_position();
126 let first = match state.peek() {
127 Some(c) => c,
128 None => return false,
129 };
130
131 if !first.is_ascii_digit() && first != '-' && first != '+' {
133 return false;
134 }
135
136 if first == '-' || first == '+' {
138 if let Some(next) = state.peek_next_n(1) {
139 if !next.is_ascii_digit() {
140 return false;
141 }
142 }
143 else {
144 return false;
145 }
146 }
147
148 state.advance(first.len_utf8());
149 let mut has_dot = false;
150 let mut has_exp = false;
151
152 while let Some(ch) = state.peek() {
153 if ch.is_ascii_digit() {
154 state.advance(ch.len_utf8());
155 }
156 else if ch == '.' && !has_dot && !has_exp {
157 has_dot = true;
158 state.advance(1);
159 }
160 else if (ch == 'e' || ch == 'E') && !has_exp {
161 has_exp = true;
162 state.advance(1);
163 if let Some(sign) = state.peek() {
165 if sign == '+' || sign == '-' {
166 state.advance(1);
167 }
168 }
169 }
170 else {
171 break;
172 }
173 }
174
175 state.add_token(JasmTokenType::Number, start, state.get_position());
176 true
177 }
178
179 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
181 let start = state.get_position();
182 let ch = match state.peek() {
183 Some(c) => c,
184 None => return false,
185 };
186
187 if !(ch.is_ascii_alphabetic() || ch == '_') {
189 return false;
190 }
191
192 state.advance(ch.len_utf8());
193 while let Some(c) = state.peek() {
194 if c.is_ascii_alphanumeric() || c == '_' {
195 state.advance(c.len_utf8());
196 }
197 else {
198 break;
199 }
200 }
201
202 let end = state.get_position();
203 let text = state.get_text_in((start..end).into());
204
205 let kind = self.classify_identifier(&text);
207 state.add_token(kind, start, state.get_position());
208 true
209 }
210
211 fn classify_identifier(&self, text: &str) -> JasmTokenType {
213 match text {
214 "class" => JasmTokenType::ClassKw,
216 "version" => JasmTokenType::VersionKw,
217 "method" => JasmTokenType::MethodKw,
218 "field" => JasmTokenType::FieldKw,
219 "string" => JasmTokenType::StringKw,
220 "sourcefile" => JasmTokenType::SourceFileKw,
221 "stack" => JasmTokenType::StackKw,
222 "locals" => JasmTokenType::LocalsKw,
223 "end" => JasmTokenType::EndKw,
224 "compiled" => JasmTokenType::CompiledKw,
225 "from" => JasmTokenType::FromKw,
226 "innerclass" => JasmTokenType::InnerClassKw,
227 "nestmembers" => JasmTokenType::NestMembersKw,
228 "bootstrapmethod" => JasmTokenType::BootstrapMethodKw,
229
230 "public" => JasmTokenType::Public,
232 "private" => JasmTokenType::Private,
233 "protected" => JasmTokenType::Protected,
234 "static" => JasmTokenType::Static,
235 "super" => JasmTokenType::Super,
236 "final" => JasmTokenType::Final,
237 "abstract" => JasmTokenType::Abstract,
238 "synchronized" => JasmTokenType::Synchronized,
239 "native" => JasmTokenType::Native,
240 "synthetic" => JasmTokenType::Synthetic,
241 "deprecated" => JasmTokenType::Deprecated,
242 "varargs" => JasmTokenType::Varargs,
243
244 "aload_0" => JasmTokenType::ALoad0,
246 "aload_1" => JasmTokenType::ALoad1,
247 "aload_2" => JasmTokenType::ALoad2,
248 "aload_3" => JasmTokenType::ALoad3,
249 "iload_0" => JasmTokenType::ILoad0,
250 "iload_1" => JasmTokenType::ILoad1,
251 "iload_2" => JasmTokenType::ILoad2,
252 "iload_3" => JasmTokenType::ILoad3,
253 "ldc" => JasmTokenType::Ldc,
254 "ldc_w" => JasmTokenType::LdcW,
255 "ldc2_w" => JasmTokenType::Ldc2W,
256 "invokespecial" => JasmTokenType::InvokeSpecial,
257 "invokevirtual" => JasmTokenType::InvokeVirtual,
258 "invokestatic" => JasmTokenType::InvokeStatic,
259 "invokeinterface" => JasmTokenType::InvokeInterface,
260 "invokedynamic" => JasmTokenType::InvokeDynamic,
261 "getstatic" => JasmTokenType::GetStatic,
262 "putstatic" => JasmTokenType::PutStatic,
263 "getfield" => JasmTokenType::GetField,
264 "putfield" => JasmTokenType::PutField,
265 "return" => JasmTokenType::Return,
266 "ireturn" => JasmTokenType::IReturn,
267 "areturn" => JasmTokenType::AReturn,
268 "lreturn" => JasmTokenType::LReturn,
269 "freturn" => JasmTokenType::FReturn,
270 "dreturn" => JasmTokenType::DReturn,
271 "nop" => JasmTokenType::Nop,
272 "dup" => JasmTokenType::Dup,
273 "pop" => JasmTokenType::Pop,
274 "new" => JasmTokenType::New,
275
276 _ => JasmTokenType::Identifier,
278 }
279 }
280
281 fn lex_punctuation<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
283 let start = state.get_position();
284
285 if let Some(ch) = state.current() {
286 let kind = match ch {
287 '{' => JasmTokenType::LeftBrace,
288 '}' => JasmTokenType::RightBrace,
289 '(' => JasmTokenType::LeftParen,
290 ')' => JasmTokenType::RightParen,
291 '[' => JasmTokenType::LeftBracket,
292 ']' => JasmTokenType::RightBracket,
293 ':' => JasmTokenType::Colon,
294 ';' => JasmTokenType::Semicolon,
295 '.' => JasmTokenType::Dot,
296 ',' => JasmTokenType::Comma,
297 '/' => JasmTokenType::Slash,
298 _ => return false,
299 };
300
301 state.advance(ch.len_utf8());
302 state.add_token(kind, start, state.get_position());
303 return true;
304 }
305
306 false
307 }
308}