Skip to main content

oak_matlab/lexer/
mod.rs

1use crate::{kind::MatlabSyntaxKind, language::MatlabLanguage};
2use oak_core::{
3    Lexer, LexerState,
4    lexer::{LexOutput, LexerCache},
5    source::{Source, TextEdit},
6};
7
8type State<'s, S> = LexerState<'s, S, MatlabLanguage>;
9
10#[derive(Clone)]
11pub struct MatlabLexer<'config> {
12    _config: &'config MatlabLanguage,
13}
14
15impl<'config> Lexer<MatlabLanguage> for MatlabLexer<'config> {
16    fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<MatlabLanguage>) -> LexOutput<MatlabLanguage> {
17        let mut state: State<'_, S> = LexerState::new(source);
18        let result = self.run(&mut state);
19        if result.is_ok() {
20            state.add_eof();
21        }
22        state.finish_with_cache(result, cache)
23    }
24}
25
26impl<'config> MatlabLexer<'config> {
27    pub fn new(config: &'config MatlabLanguage) -> Self {
28        Self { _config: config }
29    }
30
31    fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), oak_core::OakError> {
32        while state.not_at_end() {
33            let safe_point = state.get_position();
34
35            if self.skip_whitespace(state) {
36                continue;
37            }
38
39            if self.lex_newline(state) {
40                continue;
41            }
42
43            if self.lex_comment(state) {
44                continue;
45            }
46
47            if self.lex_string(state) {
48                continue;
49            }
50
51            if self.lex_number(state) {
52                continue;
53            }
54
55            if self.lex_identifier(state) {
56                continue;
57            }
58
59            if self.lex_operator(state) {
60                continue;
61            }
62
63            if self.lex_delimiter(state) {
64                continue;
65            }
66
67            let start_pos = state.get_position();
68            if let Some(ch) = state.peek() {
69                state.advance(ch.len_utf8());
70                state.add_token(MatlabSyntaxKind::Error, start_pos, state.get_position());
71            }
72
73            state.advance_if_dead_lock(safe_point);
74        }
75
76        Ok(())
77    }
78
79    fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
80        let start_pos = state.get_position();
81        while let Some(ch) = state.peek() {
82            if ch == ' ' || ch == '\t' {
83                state.advance(ch.len_utf8());
84            }
85            else {
86                break;
87            }
88        }
89        if state.get_position() > start_pos {
90            state.add_token(MatlabSyntaxKind::Whitespace, start_pos, state.get_position());
91            true
92        }
93        else {
94            false
95        }
96    }
97
98    fn lex_newline<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
99        let start_pos = state.get_position();
100        if state.consume_if_starts_with("\n") || state.consume_if_starts_with("\r\n") || state.consume_if_starts_with("\r") {
101            state.add_token(MatlabSyntaxKind::Newline, start_pos, state.get_position());
102            true
103        }
104        else {
105            false
106        }
107    }
108
109    fn lex_identifier<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
110        let start_pos = state.get_position();
111        if let Some(ch) = state.peek() {
112            if ch.is_ascii_alphabetic() || ch == '_' {
113                state.advance(ch.len_utf8());
114                state.take_while(|c| c.is_ascii_alphanumeric() || c == '_');
115
116                let text = state.get_text_in((start_pos..state.get_position()).into());
117                let token_kind = match text.as_ref() {
118                    "function" => MatlabSyntaxKind::Function,
119                    "end" => MatlabSyntaxKind::End,
120                    "if" => MatlabSyntaxKind::If,
121                    "else" => MatlabSyntaxKind::Else,
122                    "elseif" => MatlabSyntaxKind::Elseif,
123                    "while" => MatlabSyntaxKind::While,
124                    "for" => MatlabSyntaxKind::For,
125                    "break" => MatlabSyntaxKind::Break,
126                    "continue" => MatlabSyntaxKind::Continue,
127                    "return" => MatlabSyntaxKind::Return,
128                    "switch" => MatlabSyntaxKind::Switch,
129                    "case" => MatlabSyntaxKind::Case,
130                    "otherwise" => MatlabSyntaxKind::Otherwise,
131                    "try" => MatlabSyntaxKind::Try,
132                    "catch" => MatlabSyntaxKind::Catch,
133                    "global" => MatlabSyntaxKind::Global,
134                    "persistent" => MatlabSyntaxKind::Persistent,
135                    "classdef" => MatlabSyntaxKind::Classdef,
136                    "properties" => MatlabSyntaxKind::Properties,
137                    "methods" => MatlabSyntaxKind::Methods,
138                    "events" => MatlabSyntaxKind::Events,
139                    _ => MatlabSyntaxKind::Identifier,
140                };
141
142                state.add_token(token_kind, start_pos, state.get_position());
143                return true;
144            }
145        }
146        false
147    }
148
149    fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
150        let start_pos = state.get_position();
151        if let Some(ch) = state.peek() {
152            if ch.is_ascii_digit() || (ch == '.' && state.peek_next_n(1).map(|c| c.is_ascii_digit()).unwrap_or(false)) {
153                if ch == '.' {
154                    state.advance(1);
155                }
156                state.take_while(|c| c.is_ascii_digit());
157
158                if ch != '.' && state.consume_if_starts_with(".") {
159                    state.take_while(|c| c.is_ascii_digit());
160                }
161
162                if state.consume_if_starts_with("e") || state.consume_if_starts_with("E") {
163                    if let Some(sign) = state.peek() {
164                        if sign == '+' || sign == '-' {
165                            state.advance(1);
166                        }
167                    }
168                    state.take_while(|c| c.is_ascii_digit());
169                }
170
171                if state.consume_if_starts_with("i") || state.consume_if_starts_with("j") {
172                    // complex
173                }
174
175                state.add_token(MatlabSyntaxKind::Number, start_pos, state.get_position());
176                return true;
177            }
178        }
179        false
180    }
181
182    fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
183        let start_pos = state.get_position();
184        if let Some(quote) = state.peek() {
185            if quote == '\'' || quote == '"' {
186                state.advance(1);
187                while let Some(ch) = state.peek() {
188                    if ch == quote {
189                        state.advance(1);
190                        if state.peek() == Some(quote) {
191                            state.advance(1);
192                            continue;
193                        }
194                        break;
195                    }
196                    else if ch == '\\' {
197                        state.advance(1);
198                        if let Some(next) = state.peek() {
199                            state.advance(next.len_utf8());
200                        }
201                    }
202                    else {
203                        state.advance(ch.len_utf8());
204                    }
205                }
206                let kind = if quote == '\'' { MatlabSyntaxKind::Character } else { MatlabSyntaxKind::String };
207                state.add_token(kind, start_pos, state.get_position());
208                return true;
209            }
210        }
211        false
212    }
213
214    fn lex_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
215        let start_pos = state.get_position();
216        if state.consume_if_starts_with("%") {
217            if state.consume_if_starts_with("{") {
218                let mut depth = 1;
219                while depth > 0 && state.not_at_end() {
220                    if state.starts_with("%{") {
221                        depth += 1;
222                        state.advance(2);
223                    }
224                    else if state.starts_with("%}") {
225                        depth -= 1;
226                        state.advance(2);
227                    }
228                    else if let Some(ch) = state.current() {
229                        state.advance(ch.len_utf8());
230                    }
231                }
232                state.add_token(MatlabSyntaxKind::BlockComment, start_pos, state.get_position());
233            }
234            else {
235                state.take_while(|c| c != '\n' && c != '\r');
236                state.add_token(MatlabSyntaxKind::Comment, start_pos, state.get_position());
237            }
238            return true;
239        }
240        false
241    }
242
243    fn lex_operator<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
244        let start_pos = state.get_position();
245        let ops = [".*", "./", ".^", ".\\", "==", "~=", "<=", ">=", "&&", "||", "++", "--", ".'"];
246        for op in ops {
247            if state.consume_if_starts_with(op) {
248                state.add_token(MatlabSyntaxKind::Operator, start_pos, state.get_position());
249                return true;
250            }
251        }
252
253        if let Some(ch) = state.peek() {
254            let kind = match ch {
255                '+' | '-' | '*' | '/' | '\\' | '^' | '<' | '>' | '=' | '~' | '&' | '|' | '\'' => MatlabSyntaxKind::Operator,
256                _ => return false,
257            };
258            state.advance(1);
259            state.add_token(kind, start_pos, state.get_position());
260            return true;
261        }
262        false
263    }
264
265    fn lex_delimiter<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
266        let start_pos = state.get_position();
267        if let Some(ch) = state.peek() {
268            let kind = match ch {
269                '(' | ')' | '[' | ']' | '{' | '}' | ';' | ',' | ':' | '?' | '@' | '.' => MatlabSyntaxKind::Delimiter,
270                _ => return false,
271            };
272            state.advance(1);
273            state.add_token(kind, start_pos, state.get_position());
274            true
275        }
276        else {
277            false
278        }
279    }
280}