1use crate::{kind::MatlabSyntaxKind, language::MatlabLanguage};
2use oak_core::{
3 Lexer, LexerState,
4 lexer::{LexOutput, LexerCache},
5 source::{Source, TextEdit},
6};
7
8type State<'s, S> = LexerState<'s, S, MatlabLanguage>;
9
10pub struct MatlabLexer<'config> {
11 _config: &'config MatlabLanguage,
12}
13
14impl<'config> Lexer<MatlabLanguage> for MatlabLexer<'config> {
15 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<MatlabLanguage>) -> LexOutput<MatlabLanguage> {
16 let mut state: State<'_, S> = LexerState::new(source);
17 let result = self.run(&mut state);
18 if result.is_ok() {
19 state.add_eof();
20 }
21 state.finish_with_cache(result, cache)
22 }
23}
24
25impl<'config> MatlabLexer<'config> {
26 pub fn new(config: &'config MatlabLanguage) -> Self {
27 Self { _config: config }
28 }
29
30 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), oak_core::OakError> {
31 while state.not_at_end() {
32 let safe_point = state.get_position();
33
34 if self.skip_whitespace(state) {
35 continue;
36 }
37
38 if self.lex_newline(state) {
39 continue;
40 }
41
42 if self.lex_comment(state) {
43 continue;
44 }
45
46 if self.lex_string(state) {
47 continue;
48 }
49
50 if self.lex_number(state) {
51 continue;
52 }
53
54 if self.lex_identifier(state) {
55 continue;
56 }
57
58 if self.lex_operator(state) {
59 continue;
60 }
61
62 if self.lex_delimiter(state) {
63 continue;
64 }
65
66 let start_pos = state.get_position();
67 if let Some(ch) = state.peek() {
68 state.advance(ch.len_utf8());
69 state.add_token(MatlabSyntaxKind::Error, start_pos, state.get_position());
70 }
71
72 state.advance_if_dead_lock(safe_point);
73 }
74
75 Ok(())
76 }
77
78 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
79 let start_pos = state.get_position();
80 while let Some(ch) = state.peek() {
81 if ch == ' ' || ch == '\t' {
82 state.advance(ch.len_utf8());
83 }
84 else {
85 break;
86 }
87 }
88 if state.get_position() > start_pos {
89 state.add_token(MatlabSyntaxKind::Whitespace, start_pos, state.get_position());
90 true
91 }
92 else {
93 false
94 }
95 }
96
97 fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
98 let start_pos = state.get_position();
99 if state.consume_if_starts_with("\n") || state.consume_if_starts_with("\r\n") || state.consume_if_starts_with("\r") {
100 state.add_token(MatlabSyntaxKind::Newline, start_pos, state.get_position());
101 true
102 }
103 else {
104 false
105 }
106 }
107
108 fn lex_identifier<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
109 let start_pos = state.get_position();
110 if let Some(ch) = state.peek() {
111 if ch.is_ascii_alphabetic() || ch == '_' {
112 state.advance(ch.len_utf8());
113 state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
114
115 let text = state.get_text_in((start_pos..state.get_position()).into());
116 let token_kind = match text.as_ref() {
117 "function" => MatlabSyntaxKind::Function,
118 "end" => MatlabSyntaxKind::End,
119 "if" => MatlabSyntaxKind::If,
120 "else" => MatlabSyntaxKind::Else,
121 "elseif" => MatlabSyntaxKind::Elseif,
122 "while" => MatlabSyntaxKind::While,
123 "for" => MatlabSyntaxKind::For,
124 "break" => MatlabSyntaxKind::Break,
125 "continue" => MatlabSyntaxKind::Continue,
126 "return" => MatlabSyntaxKind::Return,
127 "switch" => MatlabSyntaxKind::Switch,
128 "case" => MatlabSyntaxKind::Case,
129 "otherwise" => MatlabSyntaxKind::Otherwise,
130 "try" => MatlabSyntaxKind::Try,
131 "catch" => MatlabSyntaxKind::Catch,
132 "global" => MatlabSyntaxKind::Global,
133 "persistent" => MatlabSyntaxKind::Persistent,
134 "classdef" => MatlabSyntaxKind::Classdef,
135 "properties" => MatlabSyntaxKind::Properties,
136 "methods" => MatlabSyntaxKind::Methods,
137 "events" => MatlabSyntaxKind::Events,
138 _ => MatlabSyntaxKind::Identifier,
139 };
140
141 state.add_token(token_kind, start_pos, state.get_position());
142 return true;
143 }
144 }
145 false
146 }
147
148 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
149 let start_pos = state.get_position();
150 if let Some(ch) = state.peek() {
151 if ch.is_ascii_digit() || (ch == '.' && state.peek_next_n(1).map(|c| c.is_ascii_digit()).unwrap_or(false)) {
152 if ch == '.' {
153 state.advance(1);
154 }
155 state.take_while(|c| c.is_ascii_digit());
156
157 if ch != '.' && state.consume_if_starts_with(".") {
158 state.take_while(|c| c.is_ascii_digit());
159 }
160
161 if state.consume_if_starts_with("e") || state.consume_if_starts_with("E") {
162 if let Some(sign) = state.peek() {
163 if sign == '+' || sign == '-' {
164 state.advance(1);
165 }
166 }
167 state.take_while(|c| c.is_ascii_digit());
168 }
169
170 if state.consume_if_starts_with("i") || state.consume_if_starts_with("j") {
171 }
173
174 state.add_token(MatlabSyntaxKind::Number, start_pos, state.get_position());
175 return true;
176 }
177 }
178 false
179 }
180
181 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
182 let start_pos = state.get_position();
183 if let Some(quote) = state.peek() {
184 if quote == '\'' || quote == '"' {
185 state.advance(1);
186 while let Some(ch) = state.peek() {
187 if ch == quote {
188 state.advance(1);
189 if state.peek() == Some(quote) {
190 state.advance(1);
191 continue;
192 }
193 break;
194 }
195 else if ch == '\\' {
196 state.advance(1);
197 if let Some(next) = state.peek() {
198 state.advance(next.len_utf8());
199 }
200 }
201 else {
202 state.advance(ch.len_utf8());
203 }
204 }
205 let kind = if quote == '\'' { MatlabSyntaxKind::Character } else { MatlabSyntaxKind::String };
206 state.add_token(kind, start_pos, state.get_position());
207 return true;
208 }
209 }
210 false
211 }
212
213 fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
214 let start_pos = state.get_position();
215 if state.consume_if_starts_with("%") {
216 if state.consume_if_starts_with("{") {
217 let mut depth = 1;
218 while depth > 0 && state.not_at_end() {
219 if state.starts_with("%{") {
220 depth += 1;
221 state.advance(2);
222 }
223 else if state.starts_with("%}") {
224 depth -= 1;
225 state.advance(2);
226 }
227 else if let Some(ch) = state.current() {
228 state.advance(ch.len_utf8());
229 }
230 }
231 state.add_token(MatlabSyntaxKind::BlockComment, start_pos, state.get_position());
232 }
233 else {
234 state.take_while(|c| c != '\n' && c != '\r');
235 state.add_token(MatlabSyntaxKind::Comment, start_pos, state.get_position());
236 }
237 return true;
238 }
239 false
240 }
241
242 fn lex_operator<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
243 let start_pos = state.get_position();
244 let ops = [".*", "./", ".^", ".\\", "==", "~=", "<=", ">=", "&&", "||", "++", "--", ".'"];
245 for op in ops {
246 if state.consume_if_starts_with(op) {
247 state.add_token(MatlabSyntaxKind::Operator, start_pos, state.get_position());
248 return true;
249 }
250 }
251
252 if let Some(ch) = state.peek() {
253 let kind = match ch {
254 '+' | '-' | '*' | '/' | '\\' | '^' | '<' | '>' | '=' | '~' | '&' | '|' | '\'' => MatlabSyntaxKind::Operator,
255 _ => return false,
256 };
257 state.advance(1);
258 state.add_token(kind, start_pos, state.get_position());
259 return true;
260 }
261 false
262 }
263
264 fn lex_delimiter<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
265 let start_pos = state.get_position();
266 if let Some(ch) = state.peek() {
267 let kind = match ch {
268 '(' | ')' | '[' | ']' | '{' | '}' | ';' | ',' | ':' | '?' | '@' | '.' => MatlabSyntaxKind::Delimiter,
269 _ => return false,
270 };
271 state.advance(1);
272 state.add_token(kind, start_pos, state.get_position());
273 true
274 }
275 else {
276 false
277 }
278 }
279}