1pub mod token_type;
3pub use token_type::MojoTokenType;
4
5use crate::MojoLanguage;
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError,
8 lexer::LexOutput,
9 source::{Source, TextEdit},
10};
11
12pub(crate) type State<'a, S> = LexerState<'a, S, MojoLanguage>;
13
14#[derive(Clone)]
16pub struct MojoLexer<'config> {
17 config: &'config MojoLanguage,
18}
19
20impl<'config> Lexer<MojoLanguage> for MojoLexer<'config> {
21 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<MojoLanguage>) -> LexOutput<MojoLanguage> {
22 let mut state = State::new_with_cache(source, 0, cache);
23 let result = self.run(&mut state);
24 if result.is_ok() {
25 state.add_eof();
26 }
27 state.finish_with_cache(result, cache)
28 }
29}
30
31impl<'config> MojoLexer<'config> {
32 pub fn new(config: &'config MojoLanguage) -> Self {
34 Self { config }
35 }
36
37 pub(crate) fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
38 let mut indent_stack = vec![0];
39 let mut bracket_level: usize = 0;
40 let mut at_line_start = true;
41
42 while state.not_at_end() {
43 let safe_point = state.get_position();
44
45 if at_line_start && bracket_level == 0 {
46 self.handle_indentation(state, &mut indent_stack);
47 at_line_start = false;
48 continue;
49 }
50
51 if let Some(ch) = state.current() {
52 match ch {
53 ' ' | '\t' => {
54 self.skip_whitespace(state);
55 }
56 '\n' | '\r' => {
57 self.lex_newline(state, bracket_level);
58 at_line_start = true;
59 }
60 '#' => {
61 self.lex_comment(state);
62 }
63 '"' | '\'' => {
64 self.lex_string(state);
65 }
66 '0'..='9' => {
67 self.lex_number(state);
68 }
69 'a'..='z' | 'A'..='Z' | '_' => {
70 self.lex_identifier_or_keyword(state);
71 }
72 '(' | '[' | '{' => {
73 bracket_level += 1;
74 self.lex_delimiter(state);
75 }
76 ')' | ']' | '}' => {
77 bracket_level = bracket_level.saturating_sub(1);
78 self.lex_delimiter(state);
79 }
80 '+' | '-' | '*' | '/' | '%' | '=' | '<' | '>' | '!' | '&' | '|' => {
81 self.lex_operator(state);
82 }
83 ',' | ':' | ';' | '.' => {
84 self.lex_delimiter(state);
85 }
86 _ => {
87 state.advance(ch.len_utf8());
88 state.add_token(MojoTokenType::Error, safe_point, state.get_position())
89 }
90 }
91 }
92
93 state.advance_if_dead_lock(safe_point)
94 }
95
96 while indent_stack.len() > 1 {
98 indent_stack.pop();
99 let pos = state.get_position();
100 state.add_token(MojoTokenType::Dedent, pos, pos)
101 }
102
103 Ok(())
104 }
105
106 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
107 let start_pos = state.get_position();
108 while let Some(ch) = state.current() {
109 if ch == ' ' || ch == '\t' {
110 state.advance(ch.len_utf8())
111 }
112 else {
113 break;
114 }
115 }
116 if state.get_position() > start_pos {
117 state.add_token(MojoTokenType::Whitespace, start_pos, state.get_position());
118 }
119 }
120
121 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, bracket_level: usize) {
122 let start_pos = state.get_position();
123 let kind = if bracket_level > 0 { MojoTokenType::Whitespace } else { MojoTokenType::Newline };
124
125 if let Some('\n') = state.current() {
126 state.advance(1);
127 state.add_token(kind, start_pos, state.get_position());
128 }
129 else if let Some('\r') = state.current() {
130 state.advance(1);
131 if let Some('\n') = state.current() {
132 state.advance(1);
133 }
134 state.add_token(kind, start_pos, state.get_position());
135 }
136 }
137
138 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
139 let start_pos = state.get_position();
140 state.advance(1); while let Some(ch) = state.current() {
142 if ch == '\n' || ch == '\r' {
143 break;
144 }
145 state.advance(ch.len_utf8())
146 }
147 state.add_token(MojoTokenType::Comment, start_pos, state.get_position());
148 }
149
150 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
151 let start_pos = state.get_position();
152 let quote = state.current().unwrap();
153 state.advance(1);
154 let mut escaped = false;
155 while let Some(ch) = state.current() {
156 if escaped {
157 escaped = false;
158 state.advance(ch.len_utf8());
159 continue;
160 }
161 if ch == '\\' {
162 escaped = true;
163 state.advance(1);
164 continue;
165 }
166 if ch == quote {
167 state.advance(1);
168 break;
169 }
170 state.advance(ch.len_utf8());
171 }
172 state.add_token(MojoTokenType::String, start_pos, state.get_position());
173 }
174
175 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
176 let start_pos = state.get_position();
177 let mut is_float = false;
178 while let Some(ch) = state.current() {
179 if ch.is_ascii_digit() {
180 state.advance(1);
181 }
182 else if ch == '.' && !is_float {
183 is_float = true;
184 state.advance(1);
185 }
186 else {
187 break;
188 }
189 }
190 let kind = if is_float { MojoTokenType::Float } else { MojoTokenType::Integer };
191 state.add_token(kind, start_pos, state.get_position());
192 }
193
194 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
195 let start_pos = state.get_position();
196 while let Some(ch) = state.current() {
197 if ch.is_alphanumeric() || ch == '_' {
198 state.advance(ch.len_utf8());
199 }
200 else {
201 break;
202 }
203 }
204 let text = state.get_text_in(oak_core::Range { start: start_pos, end: state.get_position() });
205 let kind = match text.as_ref() {
206 "fn" => MojoTokenType::Fn,
207 "struct" => MojoTokenType::Struct,
208 "var" => MojoTokenType::Var,
209 "let" => MojoTokenType::Let,
210 "if" => MojoTokenType::If,
211 "else" => MojoTokenType::Else,
212 "while" => MojoTokenType::While,
213 "for" => MojoTokenType::For,
214 "in" => MojoTokenType::In,
215 "return" => MojoTokenType::Return,
216 "break" => MojoTokenType::Break,
217 "continue" => MojoTokenType::Continue,
218 "import" => MojoTokenType::Import,
219 "from" => MojoTokenType::From,
220 "True" => MojoTokenType::True,
221 "False" => MojoTokenType::False,
222 "None" => MojoTokenType::None,
223 _ => MojoTokenType::Identifier,
224 };
225 state.add_token(kind, start_pos, state.get_position());
226 }
227
228 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
229 let start_pos = state.get_position();
230 let ch = state.current().unwrap();
231 state.advance(1);
232 let kind = match ch {
233 '+' => MojoTokenType::Plus,
234 '-' => {
235 if let Some('>') = state.current() {
236 state.advance(1);
237 MojoTokenType::Arrow
238 }
239 else {
240 MojoTokenType::Minus
241 }
242 }
243 '*' => MojoTokenType::Star,
244 '/' => MojoTokenType::Slash,
245 '%' => MojoTokenType::Percent,
246 '=' => {
247 if let Some('=') = state.current() {
248 state.advance(1);
249 MojoTokenType::EqualEqual
250 }
251 else {
252 MojoTokenType::Equal
253 }
254 }
255 '<' => {
256 if let Some('=') = state.current() {
257 state.advance(1);
258 MojoTokenType::LessEqual
259 }
260 else {
261 MojoTokenType::Less
262 }
263 }
264 '>' => {
265 if let Some('=') = state.current() {
266 state.advance(1);
267 MojoTokenType::GreaterEqual
268 }
269 else {
270 MojoTokenType::Greater
271 }
272 }
273 '!' => {
274 if let Some('=') = state.current() {
275 state.advance(1);
276 MojoTokenType::NotEqual
277 }
278 else {
279 MojoTokenType::Error
280 }
281 }
282 _ => MojoTokenType::Error,
283 };
284 state.add_token(kind, start_pos, state.get_position());
285 }
286
287 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
288 let start_pos = state.get_position();
289 let ch = state.current().unwrap();
290 state.advance(1);
291 let kind = match ch {
292 '(' => MojoTokenType::LeftParen,
293 ')' => MojoTokenType::RightParen,
294 '[' => MojoTokenType::LeftBracket,
295 ']' => MojoTokenType::RightBracket,
296 '{' => MojoTokenType::LeftBrace,
297 '}' => MojoTokenType::RightBrace,
298 ',' => MojoTokenType::Comma,
299 ':' => MojoTokenType::Colon,
300 ';' => MojoTokenType::Semicolon,
301 '.' => MojoTokenType::Dot,
302 _ => MojoTokenType::Error,
303 };
304 state.add_token(kind, start_pos, state.get_position());
305 }
306
307 fn handle_indentation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, stack: &mut Vec<usize>) {
308 let start_pos = state.get_position();
309 let mut indent = 0;
310 let mut temp_pos = start_pos;
311
312 while let Some(ch) = state.get_char_at(temp_pos) {
313 if ch == ' ' {
314 indent += 1;
315 }
316 else if ch == '\t' {
317 indent += 4; }
319 else {
320 break;
321 }
322 temp_pos += ch.len_utf8();
323 }
324
325 match state.get_char_at(temp_pos) {
326 Some('\n') | Some('\r') | Some('#') => {
327 return;
329 }
330 None => return, _ => {}
332 }
333
334 state.advance(temp_pos - start_pos);
335 if state.get_position() > start_pos {
336 state.add_token(MojoTokenType::Whitespace, start_pos, state.get_position());
337 }
338
339 let last_indent = *stack.last().unwrap();
340 if indent > last_indent {
341 stack.push(indent);
342 state.add_token(MojoTokenType::Indent, state.get_position(), state.get_position());
343 }
344 else {
345 while indent < *stack.last().unwrap() {
346 stack.pop();
347 state.add_token(MojoTokenType::Dedent, state.get_position(), state.get_position());
348 }
349 }
350 }
351}