1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4use crate::{language::MatlabLanguage, lexer::token_type::MatlabTokenType};
5use oak_core::{
6 Lexer, LexerState,
7 lexer::{LexOutput, LexerCache},
8 source::{Source, TextEdit},
9};
10
11type State<'s, S> = LexerState<'s, S, MatlabLanguage>;
12
13#[derive(Clone)]
14pub struct MatlabLexer<'config> {
15 _config: &'config MatlabLanguage,
16}
17
18impl<'config> Lexer<MatlabLanguage> for MatlabLexer<'config> {
19 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<MatlabLanguage>) -> LexOutput<MatlabLanguage> {
20 let mut state: State<'_, S> = LexerState::new(source);
21 let result = self.run(&mut state);
22 if result.is_ok() {
23 state.add_eof();
24 }
25 state.finish_with_cache(result, cache)
26 }
27}
28
29impl<'config> MatlabLexer<'config> {
30 pub fn new(config: &'config MatlabLanguage) -> Self {
31 Self { _config: config }
32 }
33
34 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), oak_core::OakError> {
35 while state.not_at_end() {
36 let safe_point = state.get_position();
37
38 if self.skip_whitespace(state) {
39 continue;
40 }
41
42 if self.lex_newline(state) {
43 continue;
44 }
45
46 if self.lex_comment(state) {
47 continue;
48 }
49
50 if self.lex_string(state) {
51 continue;
52 }
53
54 if self.lex_number(state) {
55 continue;
56 }
57
58 if self.lex_identifier(state) {
59 continue;
60 }
61
62 if self.lex_operator(state) {
63 continue;
64 }
65
66 if self.lex_delimiter(state) {
67 continue;
68 }
69
70 let start_pos = state.get_position();
71 if let Some(ch) = state.peek() {
72 state.advance(ch.len_utf8());
73 state.add_token(MatlabTokenType::Error, start_pos, state.get_position());
74 }
75
76 state.advance_if_dead_lock(safe_point);
77 }
78
79 Ok(())
80 }
81
82 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
83 let start_pos = state.get_position();
84 while let Some(ch) = state.peek() {
85 if ch == ' ' || ch == '\t' {
86 state.advance(ch.len_utf8());
87 }
88 else {
89 break;
90 }
91 }
92 if state.get_position() > start_pos {
93 state.add_token(MatlabTokenType::Whitespace, start_pos, state.get_position());
94 true
95 }
96 else {
97 false
98 }
99 }
100
101 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
102 let start_pos = state.get_position();
103 if state.consume_if_starts_with("\n") || state.consume_if_starts_with("\r\n") || state.consume_if_starts_with("\r") {
104 state.add_token(MatlabTokenType::Newline, start_pos, state.get_position());
105 true
106 }
107 else {
108 false
109 }
110 }
111
112 fn lex_identifier<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
113 let start_pos = state.get_position();
114 if let Some(ch) = state.peek() {
115 if ch.is_ascii_alphabetic() || ch == '_' {
116 state.advance(ch.len_utf8());
117 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
118
119 let text = state.get_text_in((start_pos..state.get_position()).into());
120 let token_kind = match text.as_ref() {
121 "function" => MatlabTokenType::Function,
122 "end" => MatlabTokenType::End,
123 "if" => MatlabTokenType::If,
124 "else" => MatlabTokenType::Else,
125 "elseif" => MatlabTokenType::Elseif,
126 "while" => MatlabTokenType::While,
127 "for" => MatlabTokenType::For,
128 "break" => MatlabTokenType::Break,
129 "continue" => MatlabTokenType::Continue,
130 "return" => MatlabTokenType::Return,
131 "switch" => MatlabTokenType::Switch,
132 "case" => MatlabTokenType::Case,
133 "otherwise" => MatlabTokenType::Otherwise,
134 "try" => MatlabTokenType::Try,
135 "catch" => MatlabTokenType::Catch,
136 "global" => MatlabTokenType::Global,
137 "persistent" => MatlabTokenType::Persistent,
138 "classdef" => MatlabTokenType::Classdef,
139 "properties" => MatlabTokenType::Properties,
140 "methods" => MatlabTokenType::Methods,
141 "events" => MatlabTokenType::Events,
142 _ => MatlabTokenType::Identifier,
143 };
144
145 state.add_token(token_kind, start_pos, state.get_position());
146 return true;
147 }
148 }
149 false
150 }
151
152 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
153 let start_pos = state.get_position();
154 if let Some(ch) = state.peek() {
155 if ch.is_ascii_digit() || (ch == '.' && state.peek_next_n(1).map(|c| c.is_ascii_digit()).unwrap_or(false)) {
156 if ch == '.' {
157 state.advance(1);
158 }
159 state.take_while(|c| c.is_ascii_digit());
160
161 if ch != '.' && state.consume_if_starts_with(".") {
162 state.take_while(|c| c.is_ascii_digit());
163 }
164
165 if state.consume_if_starts_with("e") || state.consume_if_starts_with("E") {
166 if let Some(sign) = state.peek() {
167 if sign == '+' || sign == '-' {
168 state.advance(1);
169 }
170 }
171 state.take_while(|c| c.is_ascii_digit());
172 }
173
174 if state.consume_if_starts_with("i") || state.consume_if_starts_with("j") {
175 }
177
178 state.add_token(MatlabTokenType::Number, start_pos, state.get_position());
179 return true;
180 }
181 }
182 false
183 }
184
185 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
186 let start_pos = state.get_position();
187 if let Some(quote) = state.peek() {
188 if quote == '\'' || quote == '"' {
189 state.advance(1);
190 while let Some(ch) = state.peek() {
191 if ch == quote {
192 state.advance(1);
193 if state.peek() == Some(quote) {
194 state.advance(1);
195 continue;
196 }
197 break;
198 }
199 else if ch == '\\' {
200 state.advance(1);
201 if let Some(next) = state.peek() {
202 state.advance(next.len_utf8());
203 }
204 }
205 else {
206 state.advance(ch.len_utf8());
207 }
208 }
209 let kind = if quote == '\'' { MatlabTokenType::Character } else { MatlabTokenType::String };
210 state.add_token(kind, start_pos, state.get_position());
211 return true;
212 }
213 }
214 false
215 }
216
217 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
218 let start_pos = state.get_position();
219 if state.consume_if_starts_with("%") {
220 if state.consume_if_starts_with("{") {
221 let mut depth = 1;
222 while depth > 0 && state.not_at_end() {
223 if state.starts_with("%{") {
224 depth += 1;
225 state.advance(2);
226 }
227 else if state.starts_with("%}") {
228 depth -= 1;
229 state.advance(2);
230 }
231 else if let Some(ch) = state.current() {
232 state.advance(ch.len_utf8());
233 }
234 }
235 state.add_token(MatlabTokenType::BlockComment, start_pos, state.get_position());
236 }
237 else {
238 state.take_while(|c| c != '\n' && c != '\r');
239 state.add_token(MatlabTokenType::Comment, start_pos, state.get_position());
240 }
241 return true;
242 }
243 false
244 }
245
246 fn lex_operator<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
247 let start_pos = state.get_position();
248 let ops = [".*", "./", ".^", ".\\", "==", "~=", "<=", ">=", "&&", "||", "++", "--", ".'"];
249 for op in ops {
250 if state.consume_if_starts_with(op) {
251 state.add_token(MatlabTokenType::Operator, start_pos, state.get_position());
252 return true;
253 }
254 }
255
256 if let Some(ch) = state.peek() {
257 let kind = match ch {
258 '+' | '-' | '*' | '/' | '\\' | '^' | '<' | '>' | '=' | '~' | '&' | '|' | '\'' => MatlabTokenType::Operator,
259 _ => return false,
260 };
261 state.advance(1);
262 state.add_token(kind, start_pos, state.get_position());
263 return true;
264 }
265 false
266 }
267
268 fn lex_delimiter<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
269 let start_pos = state.get_position();
270 if let Some(ch) = state.peek() {
271 let kind = match ch {
272 '(' | ')' | '[' | ']' | '{' | '}' | ';' | ',' | ':' | '?' | '@' | '.' => MatlabTokenType::Delimiter,
273 _ => return false,
274 };
275 state.advance(1);
276 state.add_token(kind, start_pos, state.get_position());
277 true
278 }
279 else {
280 false
281 }
282 }
283}