1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::MatlabLanguage, lexer::token_type::MatlabTokenType};
6use oak_core::{
7 Lexer, LexerState,
8 lexer::{LexOutput, LexerCache},
9 source::{Source, TextEdit},
10};
11
12type State<'s, S> = LexerState<'s, S, MatlabLanguage>;
13
14#[derive(Clone)]
16pub struct MatlabLexer<'config> {
17 config: &'config MatlabLanguage,
18}
19
20impl<'config> Lexer<MatlabLanguage> for MatlabLexer<'config> {
21 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<MatlabLanguage>) -> LexOutput<MatlabLanguage> {
22 let mut state: State<'_, S> = LexerState::new(source);
23 let result = self.run(&mut state);
24 if result.is_ok() {
25 state.add_eof();
26 }
27 state.finish_with_cache(result, cache)
28 }
29}
30
31impl<'config> MatlabLexer<'config> {
32 pub fn new(config: &'config MatlabLanguage) -> Self {
34 Self { config }
35 }
36
37 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), oak_core::OakError> {
38 while state.not_at_end() {
39 let safe_point = state.get_position();
40
41 if self.skip_whitespace(state) {
42 continue;
43 }
44
45 if self.lex_newline(state) {
46 continue;
47 }
48
49 if self.lex_comment(state) {
50 continue;
51 }
52
53 if self.lex_string(state) {
54 continue;
55 }
56
57 if self.lex_number(state) {
58 continue;
59 }
60
61 if self.lex_identifier(state) {
62 continue;
63 }
64
65 if self.lex_operator(state) {
66 continue;
67 }
68
69 if self.lex_delimiter(state) {
70 continue;
71 }
72
73 let start_pos = state.get_position();
74 if let Some(ch) = state.peek() {
75 state.advance(ch.len_utf8());
76 state.add_token(MatlabTokenType::Error, start_pos, state.get_position());
77 }
78
79 state.advance_if_dead_lock(safe_point);
80 }
81
82 Ok(())
83 }
84
85 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
86 let start_pos = state.get_position();
87 while let Some(ch) = state.peek() {
88 if ch == ' ' || ch == '\t' {
89 state.advance(ch.len_utf8());
90 }
91 else {
92 break;
93 }
94 }
95 if state.get_position() > start_pos {
96 state.add_token(MatlabTokenType::Whitespace, start_pos, state.get_position());
97 true
98 }
99 else {
100 false
101 }
102 }
103
104 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
105 let start_pos = state.get_position();
106 if state.consume_if_starts_with("\n") || state.consume_if_starts_with("\r\n") || state.consume_if_starts_with("\r") {
107 state.add_token(MatlabTokenType::Newline, start_pos, state.get_position());
108 true
109 }
110 else {
111 false
112 }
113 }
114
115 fn lex_identifier<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
116 let start_pos = state.get_position();
117 if let Some(ch) = state.peek() {
118 if ch.is_ascii_alphabetic() || ch == '_' {
119 state.advance(ch.len_utf8());
120 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
121
122 let text = state.get_text_in((start_pos..state.get_position()).into());
123 let token_kind = match text.as_ref() {
124 "function" => MatlabTokenType::Function,
125 "end" => MatlabTokenType::End,
126 "if" => MatlabTokenType::If,
127 "else" => MatlabTokenType::Else,
128 "elseif" => MatlabTokenType::Elseif,
129 "while" => MatlabTokenType::While,
130 "for" => MatlabTokenType::For,
131 "break" => MatlabTokenType::Break,
132 "continue" => MatlabTokenType::Continue,
133 "return" => MatlabTokenType::Return,
134 "switch" => MatlabTokenType::Switch,
135 "case" => MatlabTokenType::Case,
136 "otherwise" => MatlabTokenType::Otherwise,
137 "try" => MatlabTokenType::Try,
138 "catch" => MatlabTokenType::Catch,
139 "global" => MatlabTokenType::Global,
140 "persistent" => MatlabTokenType::Persistent,
141 "classdef" => MatlabTokenType::Classdef,
142 "properties" => MatlabTokenType::Properties,
143 "methods" => MatlabTokenType::Methods,
144 "events" => MatlabTokenType::Events,
145 _ => MatlabTokenType::Identifier,
146 };
147
148 state.add_token(token_kind, start_pos, state.get_position());
149 return true;
150 }
151 }
152 false
153 }
154
155 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
156 let start_pos = state.get_position();
157 if let Some(ch) = state.peek() {
158 if ch.is_ascii_digit() || (ch == '.' && state.peek_next_n(1).map(|c| c.is_ascii_digit()).unwrap_or(false)) {
159 if ch == '.' {
160 state.advance(1);
161 }
162 state.take_while(|c| c.is_ascii_digit());
163
164 if ch != '.' && state.consume_if_starts_with(".") {
165 state.take_while(|c| c.is_ascii_digit());
166 }
167
168 if state.consume_if_starts_with("e") || state.consume_if_starts_with("E") {
169 if let Some(sign) = state.peek() {
170 if sign == '+' || sign == '-' {
171 state.advance(1);
172 }
173 }
174 state.take_while(|c| c.is_ascii_digit());
175 }
176
177 if state.consume_if_starts_with("i") || state.consume_if_starts_with("j") {
178 }
180
181 state.add_token(MatlabTokenType::Number, start_pos, state.get_position());
182 return true;
183 }
184 }
185 false
186 }
187
188 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
189 let start_pos = state.get_position();
190 if let Some(quote) = state.peek() {
191 if quote == '\'' || quote == '"' {
192 state.advance(1);
193 while let Some(ch) = state.peek() {
194 if ch == quote {
195 state.advance(1);
196 if state.peek() == Some(quote) {
197 state.advance(1);
198 continue;
199 }
200 break;
201 }
202 else if ch == '\\' {
203 state.advance(1);
204 if let Some(next) = state.peek() {
205 state.advance(next.len_utf8());
206 }
207 }
208 else {
209 state.advance(ch.len_utf8());
210 }
211 }
212 let kind = if quote == '\'' { MatlabTokenType::Character } else { MatlabTokenType::String };
213 state.add_token(kind, start_pos, state.get_position());
214 return true;
215 }
216 }
217 false
218 }
219
220 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
221 let start_pos = state.get_position();
222 if state.consume_if_starts_with("%") {
223 if state.consume_if_starts_with("{") {
224 let mut depth = 1;
225 while depth > 0 && state.not_at_end() {
226 if state.starts_with("%{") {
227 depth += 1;
228 state.advance(2);
229 }
230 else if state.starts_with("%}") {
231 depth -= 1;
232 state.advance(2);
233 }
234 else if let Some(ch) = state.current() {
235 state.advance(ch.len_utf8());
236 }
237 }
238 state.add_token(MatlabTokenType::BlockComment, start_pos, state.get_position());
239 }
240 else {
241 state.take_while(|c| c != '\n' && c != '\r');
242 state.add_token(MatlabTokenType::Comment, start_pos, state.get_position());
243 }
244 return true;
245 }
246 false
247 }
248
249 fn lex_operator<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
250 let start_pos = state.get_position();
251 let ops = [".*", "./", ".^", ".\\", "==", "~=", "<=", ">=", "&&", "||", "++", "--", ".'"];
252 for op in ops {
253 if state.consume_if_starts_with(op) {
254 state.add_token(MatlabTokenType::Operator, start_pos, state.get_position());
255 return true;
256 }
257 }
258
259 if let Some(ch) = state.peek() {
260 let kind = match ch {
261 '+' | '-' | '*' | '/' | '\\' | '^' | '<' | '>' | '=' | '~' | '&' | '|' | '\'' => MatlabTokenType::Operator,
262 _ => return false,
263 };
264 state.advance(1);
265 state.add_token(kind, start_pos, state.get_position());
266 return true;
267 }
268 false
269 }
270
271 fn lex_delimiter<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
272 let start_pos = state.get_position();
273 if let Some(ch) = state.peek() {
274 let kind = match ch {
275 '(' | ')' | '[' | ']' | '{' | '}' | ';' | ',' | ':' | '?' | '@' | '.' => MatlabTokenType::Delimiter,
276 _ => return false,
277 };
278 state.advance(1);
279 state.add_token(kind, start_pos, state.get_position());
280 true
281 }
282 else {
283 false
284 }
285 }
286}