1use crate::{kind::MsilSyntaxKind, language::MsilLanguage};
2use oak_core::{
3 IncrementalCache, Lexer,
4 lexer::{LexOutput, LexerState},
5 source::Source,
6};
7
8#[derive(Clone)]
9pub struct MsilLexer<'config> {
10 config: &'config MsilLanguage,
11}
12
13impl<'config> MsilLexer<'config> {
14 pub fn new(config: &'config MsilLanguage) -> Self {
15 Self { config }
16 }
17
18 fn skip_whitespace<S: Source>(&self, state: &mut LexerState<S, MsilLanguage>) -> bool {
20 let start_pos = state.get_position();
21
22 while let Some(ch) = state.peek() {
23 if ch == ' ' || ch == '\t' {
24 state.advance(ch.len_utf8());
25 }
26 else {
27 break;
28 }
29 }
30
31 if state.get_position() > start_pos {
32 state.add_token(MsilSyntaxKind::Whitespace, start_pos, state.get_position());
33 true
34 }
35 else {
36 false
37 }
38 }
39
40 fn lex_newline<S: Source>(&self, state: &mut LexerState<S, MsilLanguage>) -> bool {
42 let start_pos = state.get_position();
43
44 if let Some('\n') = state.peek() {
45 state.advance(1);
46 state.add_token(MsilSyntaxKind::Whitespace, start_pos, state.get_position());
47 true
48 }
49 else if let Some('\r') = state.peek() {
50 state.advance(1);
51 if let Some('\n') = state.peek() {
52 state.advance(1);
53 }
54 state.add_token(MsilSyntaxKind::Whitespace, start_pos, state.get_position());
55 true
56 }
57 else {
58 false
59 }
60 }
61
62 fn lex_comment<S: Source>(&self, state: &mut LexerState<S, MsilLanguage>) -> bool {
64 let start_pos = state.get_position();
65
66 if let Some('/') = state.peek() {
67 if let Some('/') = state.peek_next_n(1) {
68 state.advance(2);
70 while let Some(ch) = state.peek() {
71 if ch == '\n' || ch == '\r' {
72 break;
73 }
74 state.advance(ch.len_utf8());
75 }
76 state.add_token(MsilSyntaxKind::CommentToken, start_pos, state.get_position());
77 return true;
78 }
79 }
80
81 false
82 }
83
84 fn lex_identifier<S: Source>(&self, state: &mut LexerState<S, MsilLanguage>) -> bool {
86 let start_pos = state.get_position();
87
88 if let Some(ch) = state.peek() {
89 if !ch.is_ascii_alphabetic() && ch != '_' && ch != '.' {
90 return false;
91 }
92
93 while let Some(ch) = state.peek() {
95 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '.' {
96 state.advance(ch.len_utf8());
97 }
98 else {
99 break;
100 }
101 }
102
103 let text = state.get_text_in((start_pos..state.get_position()).into());
105 let token_kind = match text {
106 ".assembly" => MsilSyntaxKind::AssemblyKeyword,
107 "extern" => MsilSyntaxKind::ExternKeyword,
108 ".module" => MsilSyntaxKind::ModuleKeyword,
109 ".class" => MsilSyntaxKind::ClassKeyword,
110 ".method" => MsilSyntaxKind::MethodKeyword,
111 "public" => MsilSyntaxKind::PublicKeyword,
112 "private" => MsilSyntaxKind::PrivateKeyword,
113 "static" => MsilSyntaxKind::StaticKeyword,
114 _ => MsilSyntaxKind::IdentifierToken,
115 };
116
117 state.add_token(token_kind, start_pos, state.get_position());
118 true
119 }
120 else {
121 false
122 }
123 }
124
125 fn lex_number<S: Source>(&self, state: &mut LexerState<S, MsilLanguage>) -> bool {
127 let start_pos = state.get_position();
128
129 if let Some(ch) = state.peek() {
130 if !ch.is_ascii_digit() {
131 return false;
132 }
133
134 while let Some(ch) = state.peek() {
136 if ch.is_ascii_digit() {
137 state.advance(ch.len_utf8());
138 }
139 else {
140 break;
141 }
142 }
143
144 if let Some('.') = state.peek() {
146 if let Some(next_ch) = state.peek_next_n(1) {
147 if next_ch.is_ascii_digit() {
148 state.advance(1); while let Some(ch) = state.peek() {
150 if ch.is_ascii_digit() {
151 state.advance(ch.len_utf8());
152 }
153 else {
154 break;
155 }
156 }
157 }
158 }
159 }
160
161 state.add_token(MsilSyntaxKind::NumberToken, start_pos, state.get_position());
162 true
163 }
164 else {
165 false
166 }
167 }
168
169 fn lex_string<S: Source>(&self, state: &mut LexerState<S, MsilLanguage>) -> bool {
171 let start_pos = state.get_position();
172
173 if let Some('"') = state.peek() {
174 state.advance(1); while let Some(ch) = state.peek() {
177 if ch == '"' {
178 state.advance(1); break;
180 }
181 else if ch == '\\' {
182 state.advance(1); if let Some(_) = state.peek() {
184 state.advance(1); }
186 }
187 else {
188 state.advance(ch.len_utf8());
189 }
190 }
191
192 state.add_token(MsilSyntaxKind::StringToken, start_pos, state.get_position());
193 true
194 }
195 else {
196 false
197 }
198 }
199
200 fn lex_delimiter<S: Source>(&self, state: &mut LexerState<S, MsilLanguage>) -> bool {
202 let start_pos = state.get_position();
203
204 if let Some(ch) = state.peek() {
205 let token_kind = match ch {
206 '{' => MsilSyntaxKind::LeftBrace,
207 '}' => MsilSyntaxKind::RightBrace,
208 '(' => MsilSyntaxKind::LeftParen,
209 ')' => MsilSyntaxKind::RightParen,
210 '[' => MsilSyntaxKind::LeftBracket,
211 ']' => MsilSyntaxKind::RightBracket,
212 '.' => MsilSyntaxKind::Dot,
213 ':' => MsilSyntaxKind::Colon,
214 ';' => MsilSyntaxKind::Semicolon,
215 ',' => MsilSyntaxKind::Comma,
216 _ => return false,
217 };
218
219 state.advance(ch.len_utf8());
220 state.add_token(token_kind, start_pos, state.get_position());
221 true
222 }
223 else {
224 false
225 }
226 }
227}
228
229impl<'config> Lexer<MsilLanguage> for MsilLexer<'config> {
230 fn lex_incremental(
231 &self,
232 source: impl Source,
233 changed: usize,
234 cache: IncrementalCache<MsilLanguage>,
235 ) -> LexOutput<MsilLanguage> {
236 let mut state = LexerState::new_with_cache(source, changed, cache);
237
238 while state.not_at_end() {
239 if self.skip_whitespace(&mut state) {
241 continue;
242 }
243
244 if self.lex_newline(&mut state) {
245 continue;
246 }
247
248 if self.lex_comment(&mut state) {
249 continue;
250 }
251
252 if self.lex_string(&mut state) {
253 continue;
254 }
255
256 if self.lex_number(&mut state) {
257 continue;
258 }
259
260 if self.lex_identifier(&mut state) {
261 continue;
262 }
263
264 if self.lex_delimiter(&mut state) {
265 continue;
266 }
267
268 let start_pos = state.get_position();
270 if let Some(ch) = state.peek() {
271 state.advance(ch.len_utf8());
272 state.add_token(MsilSyntaxKind::Error, start_pos, state.get_position());
273 }
274 }
275
276 let eof_pos = state.get_position();
278 state.add_token(MsilSyntaxKind::Eof, eof_pos, eof_pos);
279
280 state.finish(Ok(()))
281 }
282}