1use crate::{kind::MatlabSyntaxKind, language::MatlabLanguage};
2use oak_core::{
3 Lexer, LexerState,
4 lexer::{LexOutput, LexerCache},
5 source::{Source, TextEdit},
6};
7
8type State<'s, S> = LexerState<'s, S, MatlabLanguage>;
9
10#[derive(Clone)]
11pub struct MatlabLexer<'config> {
12 _config: &'config MatlabLanguage,
13}
14
15impl<'config> Lexer<MatlabLanguage> for MatlabLexer<'config> {
16 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<MatlabLanguage>) -> LexOutput<MatlabLanguage> {
17 let mut state: State<'_, S> = LexerState::new(source);
18 let result = self.run(&mut state);
19 if result.is_ok() {
20 state.add_eof();
21 }
22 state.finish_with_cache(result, cache)
23 }
24}
25
26impl<'config> MatlabLexer<'config> {
27 pub fn new(config: &'config MatlabLanguage) -> Self {
28 Self { _config: config }
29 }
30
31 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), oak_core::OakError> {
32 while state.not_at_end() {
33 let safe_point = state.get_position();
34
35 if self.skip_whitespace(state) {
36 continue;
37 }
38
39 if self.lex_newline(state) {
40 continue;
41 }
42
43 if self.lex_comment(state) {
44 continue;
45 }
46
47 if self.lex_string(state) {
48 continue;
49 }
50
51 if self.lex_number(state) {
52 continue;
53 }
54
55 if self.lex_identifier(state) {
56 continue;
57 }
58
59 if self.lex_operator(state) {
60 continue;
61 }
62
63 if self.lex_delimiter(state) {
64 continue;
65 }
66
67 let start_pos = state.get_position();
68 if let Some(ch) = state.peek() {
69 state.advance(ch.len_utf8());
70 state.add_token(MatlabSyntaxKind::Error, start_pos, state.get_position());
71 }
72
73 state.advance_if_dead_lock(safe_point);
74 }
75
76 Ok(())
77 }
78
79 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
80 let start_pos = state.get_position();
81 while let Some(ch) = state.peek() {
82 if ch == ' ' || ch == '\t' {
83 state.advance(ch.len_utf8());
84 }
85 else {
86 break;
87 }
88 }
89 if state.get_position() > start_pos {
90 state.add_token(MatlabSyntaxKind::Whitespace, start_pos, state.get_position());
91 true
92 }
93 else {
94 false
95 }
96 }
97
98 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
99 let start_pos = state.get_position();
100 if state.consume_if_starts_with("\n") || state.consume_if_starts_with("\r\n") || state.consume_if_starts_with("\r") {
101 state.add_token(MatlabSyntaxKind::Newline, start_pos, state.get_position());
102 true
103 }
104 else {
105 false
106 }
107 }
108
109 fn lex_identifier<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
110 let start_pos = state.get_position();
111 if let Some(ch) = state.peek() {
112 if ch.is_ascii_alphabetic() || ch == '_' {
113 state.advance(ch.len_utf8());
114 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
115
116 let text = state.get_text_in((start_pos..state.get_position()).into());
117 let token_kind = match text.as_ref() {
118 "function" => MatlabSyntaxKind::Function,
119 "end" => MatlabSyntaxKind::End,
120 "if" => MatlabSyntaxKind::If,
121 "else" => MatlabSyntaxKind::Else,
122 "elseif" => MatlabSyntaxKind::Elseif,
123 "while" => MatlabSyntaxKind::While,
124 "for" => MatlabSyntaxKind::For,
125 "break" => MatlabSyntaxKind::Break,
126 "continue" => MatlabSyntaxKind::Continue,
127 "return" => MatlabSyntaxKind::Return,
128 "switch" => MatlabSyntaxKind::Switch,
129 "case" => MatlabSyntaxKind::Case,
130 "otherwise" => MatlabSyntaxKind::Otherwise,
131 "try" => MatlabSyntaxKind::Try,
132 "catch" => MatlabSyntaxKind::Catch,
133 "global" => MatlabSyntaxKind::Global,
134 "persistent" => MatlabSyntaxKind::Persistent,
135 "classdef" => MatlabSyntaxKind::Classdef,
136 "properties" => MatlabSyntaxKind::Properties,
137 "methods" => MatlabSyntaxKind::Methods,
138 "events" => MatlabSyntaxKind::Events,
139 _ => MatlabSyntaxKind::Identifier,
140 };
141
142 state.add_token(token_kind, start_pos, state.get_position());
143 return true;
144 }
145 }
146 false
147 }
148
149 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
150 let start_pos = state.get_position();
151 if let Some(ch) = state.peek() {
152 if ch.is_ascii_digit() || (ch == '.' && state.peek_next_n(1).map(|c| c.is_ascii_digit()).unwrap_or(false)) {
153 if ch == '.' {
154 state.advance(1);
155 }
156 state.take_while(|c| c.is_ascii_digit());
157
158 if ch != '.' && state.consume_if_starts_with(".") {
159 state.take_while(|c| c.is_ascii_digit());
160 }
161
162 if state.consume_if_starts_with("e") || state.consume_if_starts_with("E") {
163 if let Some(sign) = state.peek() {
164 if sign == '+' || sign == '-' {
165 state.advance(1);
166 }
167 }
168 state.take_while(|c| c.is_ascii_digit());
169 }
170
171 if state.consume_if_starts_with("i") || state.consume_if_starts_with("j") {
172 }
174
175 state.add_token(MatlabSyntaxKind::Number, start_pos, state.get_position());
176 return true;
177 }
178 }
179 false
180 }
181
182 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
183 let start_pos = state.get_position();
184 if let Some(quote) = state.peek() {
185 if quote == '\'' || quote == '"' {
186 state.advance(1);
187 while let Some(ch) = state.peek() {
188 if ch == quote {
189 state.advance(1);
190 if state.peek() == Some(quote) {
191 state.advance(1);
192 continue;
193 }
194 break;
195 }
196 else if ch == '\\' {
197 state.advance(1);
198 if let Some(next) = state.peek() {
199 state.advance(next.len_utf8());
200 }
201 }
202 else {
203 state.advance(ch.len_utf8());
204 }
205 }
206 let kind = if quote == '\'' { MatlabSyntaxKind::Character } else { MatlabSyntaxKind::String };
207 state.add_token(kind, start_pos, state.get_position());
208 return true;
209 }
210 }
211 false
212 }
213
214 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
215 let start_pos = state.get_position();
216 if state.consume_if_starts_with("%") {
217 if state.consume_if_starts_with("{") {
218 let mut depth = 1;
219 while depth > 0 && state.not_at_end() {
220 if state.starts_with("%{") {
221 depth += 1;
222 state.advance(2);
223 }
224 else if state.starts_with("%}") {
225 depth -= 1;
226 state.advance(2);
227 }
228 else if let Some(ch) = state.current() {
229 state.advance(ch.len_utf8());
230 }
231 }
232 state.add_token(MatlabSyntaxKind::BlockComment, start_pos, state.get_position());
233 }
234 else {
235 state.take_while(|c| c != '\n' && c != '\r');
236 state.add_token(MatlabSyntaxKind::Comment, start_pos, state.get_position());
237 }
238 return true;
239 }
240 false
241 }
242
243 fn lex_operator<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
244 let start_pos = state.get_position();
245 let ops = [".*", "./", ".^", ".\\", "==", "~=", "<=", ">=", "&&", "||", "++", "--", ".'"];
246 for op in ops {
247 if state.consume_if_starts_with(op) {
248 state.add_token(MatlabSyntaxKind::Operator, start_pos, state.get_position());
249 return true;
250 }
251 }
252
253 if let Some(ch) = state.peek() {
254 let kind = match ch {
255 '+' | '-' | '*' | '/' | '\\' | '^' | '<' | '>' | '=' | '~' | '&' | '|' | '\'' => MatlabSyntaxKind::Operator,
256 _ => return false,
257 };
258 state.advance(1);
259 state.add_token(kind, start_pos, state.get_position());
260 return true;
261 }
262 false
263 }
264
265 fn lex_delimiter<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
266 let start_pos = state.get_position();
267 if let Some(ch) = state.peek() {
268 let kind = match ch {
269 '(' | ')' | '[' | ']' | '{' | '}' | ';' | ',' | ':' | '?' | '@' | '.' => MatlabSyntaxKind::Delimiter,
270 _ => return false,
271 };
272 state.advance(1);
273 state.add_token(kind, start_pos, state.get_position());
274 true
275 }
276 else {
277 false
278 }
279 }
280}