1pub mod token_type;
2pub use token_type::MojoTokenType;
3
4use crate::MojoLanguage;
5use oak_core::{
6 Lexer, LexerCache, LexerState, OakError,
7 lexer::LexOutput,
8 source::{Source, TextEdit},
9};
10
11type State<'a, S> = LexerState<'a, S, MojoLanguage>;
12
13#[derive(Clone, Default)]
15pub struct MojoLexer {}
16
17impl Lexer<MojoLanguage> for MojoLexer {
18 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<MojoLanguage>) -> LexOutput<MojoLanguage> {
19 let mut state = State::new_with_cache(source, 0, cache);
20 let result = self.run(&mut state);
21 if result.is_ok() {
22 state.add_eof();
23 }
24 state.finish_with_cache(result, cache)
25 }
26}
27
28impl MojoLexer {
29 pub fn new() -> Self {
31 Self {}
32 }
33
34 pub(crate) fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
35 let mut indent_stack = vec![0];
36 let mut bracket_level: usize = 0;
37 let mut at_line_start = true;
38
39 while state.not_at_end() {
40 let safe_point = state.get_position();
41
42 if at_line_start && bracket_level == 0 {
43 self.handle_indentation(state, &mut indent_stack);
44 at_line_start = false;
45 continue;
46 }
47
48 if let Some(ch) = state.current() {
49 match ch {
50 ' ' | '\t' => {
51 self.skip_whitespace(state);
52 }
53 '\n' | '\r' => {
54 self.lex_newline(state, bracket_level);
55 at_line_start = true;
56 }
57 '#' => {
58 self.lex_comment(state);
59 }
60 '"' | '\'' => {
61 self.lex_string(state);
62 }
63 '0'..='9' => {
64 self.lex_number(state);
65 }
66 'a'..='z' | 'A'..='Z' | '_' => {
67 self.lex_identifier_or_keyword(state);
68 }
69 '(' | '[' | '{' => {
70 bracket_level += 1;
71 self.lex_delimiter(state);
72 }
73 ')' | ']' | '}' => {
74 bracket_level = bracket_level.saturating_sub(1);
75 self.lex_delimiter(state);
76 }
77 '+' | '-' | '*' | '/' | '%' | '=' | '<' | '>' | '!' | '&' | '|' => {
78 self.lex_operator(state);
79 }
80 ',' | ':' | ';' | '.' => {
81 self.lex_delimiter(state);
82 }
83 _ => {
84 state.advance(ch.len_utf8());
85 state.add_token(MojoTokenType::Error, safe_point, state.get_position())
86 }
87 }
88 }
89
90 state.advance_if_dead_lock(safe_point)
91 }
92
93 while indent_stack.len() > 1 {
95 indent_stack.pop();
96 let pos = state.get_position();
97 state.add_token(MojoTokenType::Dedent, pos, pos)
98 }
99
100 Ok(())
101 }
102
103 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
104 let start_pos = state.get_position();
105 while let Some(ch) = state.current() {
106 if ch == ' ' || ch == '\t' {
107 state.advance(ch.len_utf8())
108 }
109 else {
110 break;
111 }
112 }
113 if state.get_position() > start_pos {
114 state.add_token(MojoTokenType::Whitespace, start_pos, state.get_position());
115 }
116 }
117
118 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, bracket_level: usize) {
119 let start_pos = state.get_position();
120 let kind = if bracket_level > 0 { MojoTokenType::Whitespace } else { MojoTokenType::Newline };
121
122 if let Some('\n') = state.current() {
123 state.advance(1);
124 state.add_token(kind, start_pos, state.get_position());
125 }
126 else if let Some('\r') = state.current() {
127 state.advance(1);
128 if let Some('\n') = state.current() {
129 state.advance(1);
130 }
131 state.add_token(kind, start_pos, state.get_position());
132 }
133 }
134
135 fn lex_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
136 let start_pos = state.get_position();
137 state.advance(1); while let Some(ch) = state.current() {
139 if ch == '\n' || ch == '\r' {
140 break;
141 }
142 state.advance(ch.len_utf8())
143 }
144 state.add_token(MojoTokenType::Comment, start_pos, state.get_position());
145 }
146
147 fn lex_string<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
148 let start_pos = state.get_position();
149 let quote = state.current().unwrap();
150 state.advance(1);
151 let mut escaped = false;
152 while let Some(ch) = state.current() {
153 if escaped {
154 escaped = false;
155 state.advance(ch.len_utf8());
156 continue;
157 }
158 if ch == '\\' {
159 escaped = true;
160 state.advance(1);
161 continue;
162 }
163 if ch == quote {
164 state.advance(1);
165 break;
166 }
167 state.advance(ch.len_utf8());
168 }
169 state.add_token(MojoTokenType::String, start_pos, state.get_position());
170 }
171
172 fn lex_number<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
173 let start_pos = state.get_position();
174 let mut is_float = false;
175 while let Some(ch) = state.current() {
176 if ch.is_ascii_digit() {
177 state.advance(1);
178 }
179 else if ch == '.' && !is_float {
180 is_float = true;
181 state.advance(1);
182 }
183 else {
184 break;
185 }
186 }
187 let kind = if is_float { MojoTokenType::Float } else { MojoTokenType::Integer };
188 state.add_token(kind, start_pos, state.get_position());
189 }
190
191 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
192 let start_pos = state.get_position();
193 while let Some(ch) = state.current() {
194 if ch.is_alphanumeric() || ch == '_' {
195 state.advance(ch.len_utf8());
196 }
197 else {
198 break;
199 }
200 }
201 let text = state.get_text_in(oak_core::Range { start: start_pos, end: state.get_position() });
202 let kind = match text.as_ref() {
203 "fn" => MojoTokenType::Fn,
204 "struct" => MojoTokenType::Struct,
205 "var" => MojoTokenType::Var,
206 "let" => MojoTokenType::Let,
207 "if" => MojoTokenType::If,
208 "else" => MojoTokenType::Else,
209 "while" => MojoTokenType::While,
210 "for" => MojoTokenType::For,
211 "in" => MojoTokenType::In,
212 "return" => MojoTokenType::Return,
213 "break" => MojoTokenType::Break,
214 "continue" => MojoTokenType::Continue,
215 "import" => MojoTokenType::Import,
216 "from" => MojoTokenType::From,
217 "True" => MojoTokenType::True,
218 "False" => MojoTokenType::False,
219 "None" => MojoTokenType::None,
220 _ => MojoTokenType::Identifier,
221 };
222 state.add_token(kind, start_pos, state.get_position());
223 }
224
225 fn lex_operator<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
226 let start_pos = state.get_position();
227 let ch = state.current().unwrap();
228 state.advance(1);
229 let kind = match ch {
230 '+' => MojoTokenType::Plus,
231 '-' => {
232 if let Some('>') = state.current() {
233 state.advance(1);
234 MojoTokenType::Arrow
235 }
236 else {
237 MojoTokenType::Minus
238 }
239 }
240 '*' => MojoTokenType::Star,
241 '/' => MojoTokenType::Slash,
242 '%' => MojoTokenType::Percent,
243 '=' => {
244 if let Some('=') = state.current() {
245 state.advance(1);
246 MojoTokenType::EqualEqual
247 }
248 else {
249 MojoTokenType::Equal
250 }
251 }
252 '<' => {
253 if let Some('=') = state.current() {
254 state.advance(1);
255 MojoTokenType::LessEqual
256 }
257 else {
258 MojoTokenType::Less
259 }
260 }
261 '>' => {
262 if let Some('=') = state.current() {
263 state.advance(1);
264 MojoTokenType::GreaterEqual
265 }
266 else {
267 MojoTokenType::Greater
268 }
269 }
270 '!' => {
271 if let Some('=') = state.current() {
272 state.advance(1);
273 MojoTokenType::NotEqual
274 }
275 else {
276 MojoTokenType::Error
277 }
278 }
279 _ => MojoTokenType::Error,
280 };
281 state.add_token(kind, start_pos, state.get_position());
282 }
283
284 fn lex_delimiter<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
285 let start_pos = state.get_position();
286 let ch = state.current().unwrap();
287 state.advance(1);
288 let kind = match ch {
289 '(' => MojoTokenType::LeftParen,
290 ')' => MojoTokenType::RightParen,
291 '[' => MojoTokenType::LeftBracket,
292 ']' => MojoTokenType::RightBracket,
293 '{' => MojoTokenType::LeftBrace,
294 '}' => MojoTokenType::RightBrace,
295 ',' => MojoTokenType::Comma,
296 ':' => MojoTokenType::Colon,
297 ';' => MojoTokenType::Semicolon,
298 '.' => MojoTokenType::Dot,
299 _ => MojoTokenType::Error,
300 };
301 state.add_token(kind, start_pos, state.get_position());
302 }
303
304 fn handle_indentation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, stack: &mut Vec<usize>) {
305 let start_pos = state.get_position();
306 let mut indent = 0;
307 let mut temp_pos = start_pos;
308
309 while let Some(ch) = state.get_char_at(temp_pos) {
310 if ch == ' ' {
311 indent += 1;
312 }
313 else if ch == '\t' {
314 indent += 4; }
316 else {
317 break;
318 }
319 temp_pos += ch.len_utf8();
320 }
321
322 match state.get_char_at(temp_pos) {
323 Some('\n') | Some('\r') | Some('#') => {
324 return;
326 }
327 None => return, _ => {}
329 }
330
331 state.advance(temp_pos - start_pos);
332 if state.get_position() > start_pos {
333 state.add_token(MojoTokenType::Whitespace, start_pos, state.get_position());
334 }
335
336 let last_indent = *stack.last().unwrap();
337 if indent > last_indent {
338 stack.push(indent);
339 state.add_token(MojoTokenType::Indent, state.get_position(), state.get_position());
340 }
341 else {
342 while indent < *stack.last().unwrap() {
343 stack.pop();
344 state.add_token(MojoTokenType::Dedent, state.get_position(), state.get_position());
345 }
346 }
347 }
348}