1use crate::{kind::ObjectiveCSyntaxKind, language::ObjectiveCLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
3
4type State<'a, S> = LexerState<'a, S, ObjectiveCLanguage>;
5
6#[derive(Clone)]
7pub struct ObjectiveCLexer<'config> {
8 #[allow(dead_code)]
9 config: &'config ObjectiveCLanguage,
10}
11
12impl<'config> Lexer<ObjectiveCLanguage> for ObjectiveCLexer<'config> {
13 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<ObjectiveCLanguage>) -> LexOutput<ObjectiveCLanguage> {
14 let mut state = State::new(source);
15 let result = self.run(&mut state);
16 if result.is_ok() {
17 state.add_eof();
18 }
19 state.finish_with_cache(result, cache)
20 }
21}
22
23impl<'config> ObjectiveCLexer<'config> {
24 pub fn new(config: &'config ObjectiveCLanguage) -> Self {
25 Self { config }
26 }
27
28 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
30 while state.not_at_end() {
31 let safe_point = state.get_position();
32
33 if self.skip_whitespace(state) {
34 continue;
35 }
36
37 if self.skip_comment(state) {
38 continue;
39 }
40
41 if self.lex_string_literal(state) {
42 continue;
43 }
44
45 if self.lex_char_literal(state) {
46 continue;
47 }
48
49 if self.lex_number_literal(state) {
50 continue;
51 }
52
53 if self.lex_identifier_or_keyword(state) {
54 continue;
55 }
56
57 if self.lex_operators(state) {
58 continue;
59 }
60
61 if self.lex_single_char_tokens(state) {
62 continue;
63 }
64
65 let start_pos = state.get_position();
67 if let Some(ch) = state.peek() {
68 state.advance(ch.len_utf8());
69 state.add_token(ObjectiveCSyntaxKind::Error, start_pos, state.get_position());
70 }
71
72 state.advance_if_dead_lock(safe_point);
73 }
74
75 Ok(())
76 }
77
78 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
80 let start = state.get_position();
81 while let Some(ch) = state.peek() {
82 if ch.is_whitespace() {
83 state.advance(ch.len_utf8());
84 }
85 else {
86 break;
87 }
88 }
89 if state.get_position() > start {
90 state.add_token(ObjectiveCSyntaxKind::Whitespace, start, state.get_position());
91 true
92 }
93 else {
94 false
95 }
96 }
97
98 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
99 let start = state.get_position();
100 let rest = state.rest();
101 if rest.starts_with("//") {
103 state.advance(2);
104 while let Some(ch) = state.peek() {
105 if ch == '\n' || ch == '\r' {
106 break;
107 }
108 state.advance(ch.len_utf8());
109 }
110 state.add_token(ObjectiveCSyntaxKind::CommentToken, start, state.get_position());
111 return true;
112 }
113 if rest.starts_with("/*") {
115 state.advance(2);
116 let mut depth = 1usize;
117 while let Some(ch) = state.peek() {
118 if ch == '/' && state.peek_next_n(1) == Some('*') {
119 state.advance(2);
120 depth += 1;
121 continue;
122 }
123 if ch == '*' && state.peek_next_n(1) == Some('/') {
124 state.advance(2);
125 depth -= 1;
126 if depth == 0 {
127 break;
128 }
129 continue;
130 }
131 state.advance(ch.len_utf8());
132 }
133 state.add_token(ObjectiveCSyntaxKind::CommentToken, start, state.get_position());
134 return true;
135 }
136 false
137 }
138
139 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
140 let start = state.get_position();
141
142 if state.peek() == Some('@') && state.peek_next_n(1) == Some('"') {
144 state.advance(2); let mut escaped = false;
146 while let Some(ch) = state.peek() {
147 if ch == '"' && !escaped {
148 state.advance(1); break;
150 }
151 state.advance(ch.len_utf8());
152 if escaped {
153 escaped = false;
154 continue;
155 }
156 if ch == '\\' {
157 escaped = true;
158 continue;
159 }
160 if ch == '\n' || ch == '\r' {
161 break;
162 }
163 }
164 state.add_token(ObjectiveCSyntaxKind::String, start, state.get_position());
165 return true;
166 }
167
168 if state.peek() == Some('"') {
170 state.advance(1);
171 let mut escaped = false;
172 while let Some(ch) = state.peek() {
173 if ch == '"' && !escaped {
174 state.advance(1); break;
176 }
177 state.advance(ch.len_utf8());
178 if escaped {
179 escaped = false;
180 continue;
181 }
182 if ch == '\\' {
183 escaped = true;
184 continue;
185 }
186 if ch == '\n' || ch == '\r' {
187 break;
188 }
189 }
190 state.add_token(ObjectiveCSyntaxKind::String, start, state.get_position());
191 return true;
192 }
193
194 false
195 }
196
197 fn lex_char_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
198 let start = state.get_position();
199 if state.peek() != Some('\'') {
200 return false;
201 }
202
203 state.advance(1); if let Some('\\') = state.peek() {
205 state.advance(1);
206 if let Some(c) = state.peek() {
207 state.advance(c.len_utf8());
208 }
209 }
210 else if let Some(c) = state.peek() {
211 state.advance(c.len_utf8());
212 }
213 else {
214 state.set_position(start);
215 return false;
216 }
217
218 if state.peek() == Some('\'') {
219 state.advance(1);
220 state.add_token(ObjectiveCSyntaxKind::Character, start, state.get_position());
221 return true;
222 }
223
224 state.set_position(start);
225 false
226 }
227
228 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
229 let start = state.get_position();
230 let first = match state.peek() {
231 Some(c) => c,
232 None => return false,
233 };
234
235 if !first.is_ascii_digit() {
236 return false;
237 }
238
239 let mut is_float = false;
240
241 state.advance(1);
243 while let Some(c) = state.peek() {
244 if c.is_ascii_digit() {
245 state.advance(1);
246 }
247 else {
248 break;
249 }
250 }
251
252 if state.peek() == Some('.') {
254 let n1 = state.peek_next_n(1);
255 if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
256 is_float = true;
257 state.advance(1); while let Some(c) = state.peek() {
259 if c.is_ascii_digit() {
260 state.advance(1);
261 }
262 else {
263 break;
264 }
265 }
266 }
267 }
268
269 if let Some(c) = state.peek() {
271 if c == 'e' || c == 'E' {
272 let n1 = state.peek_next_n(1);
273 if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
274 is_float = true;
275 state.advance(1);
276 if let Some(sign) = state.peek() {
277 if sign == '+' || sign == '-' {
278 state.advance(1);
279 }
280 }
281 while let Some(d) = state.peek() {
282 if d.is_ascii_digit() {
283 state.advance(1);
284 }
285 else {
286 break;
287 }
288 }
289 }
290 }
291 }
292
293 while let Some(c) = state.peek() {
295 if c.is_ascii_alphabetic() {
296 state.advance(1);
297 }
298 else {
299 break;
300 }
301 }
302
303 let end = state.get_position();
304 state.add_token(if is_float { ObjectiveCSyntaxKind::FloatLiteral } else { ObjectiveCSyntaxKind::IntegerLiteral }, start, end);
305 true
306 }
307
308 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
309 let start = state.get_position();
310 let ch = match state.peek() {
311 Some(c) => c,
312 None => return false,
313 };
314
315 if !(ch.is_ascii_alphabetic() || ch == '_' || ch == '@' || ch == '#') {
316 return false;
317 }
318
319 state.advance(1);
320 while let Some(c) = state.peek() {
321 if c.is_ascii_alphanumeric() || c == '_' {
322 state.advance(1);
323 }
324 else {
325 break;
326 }
327 }
328
329 let end = state.get_position();
330 let text = state.get_text_in(oak_core::Range { start, end });
331 let kind = match text.as_ref() {
332 "@interface" => ObjectiveCSyntaxKind::InterfaceKeyword,
334 "@implementation" => ObjectiveCSyntaxKind::ImplementationKeyword,
335 "@end" => ObjectiveCSyntaxKind::EndKeyword,
336 "@property" => ObjectiveCSyntaxKind::PropertyKeyword,
337 "@synthesize" => ObjectiveCSyntaxKind::SynthesizeKeyword,
338 "@dynamic" => ObjectiveCSyntaxKind::DynamicKeyword,
339 "@protocol" => ObjectiveCSyntaxKind::ProtocolKeyword,
340 "@import" => ObjectiveCSyntaxKind::ImportKeyword,
341 "#import" => ObjectiveCSyntaxKind::ImportKeyword,
342 "#include" => ObjectiveCSyntaxKind::IncludeKeyword,
343
344 "if" => ObjectiveCSyntaxKind::IfKeyword,
346 "else" => ObjectiveCSyntaxKind::ElseKeyword,
347 "for" => ObjectiveCSyntaxKind::ForKeyword,
348 "while" => ObjectiveCSyntaxKind::WhileKeyword,
349 "do" => ObjectiveCSyntaxKind::DoKeyword,
350 "switch" => ObjectiveCSyntaxKind::SwitchKeyword,
351 "case" => ObjectiveCSyntaxKind::CaseKeyword,
352 "default" => ObjectiveCSyntaxKind::DefaultKeyword,
353 "break" => ObjectiveCSyntaxKind::BreakKeyword,
354 "continue" => ObjectiveCSyntaxKind::ContinueKeyword,
355 "return" => ObjectiveCSyntaxKind::ReturnKeyword,
356 "void" => ObjectiveCSyntaxKind::VoidKeyword,
357 "int" => ObjectiveCSyntaxKind::IntKeyword,
358 "float" => ObjectiveCSyntaxKind::FloatKeyword,
359 "double" => ObjectiveCSyntaxKind::DoubleKeyword,
360 "char" => ObjectiveCSyntaxKind::CharKeyword,
361 "BOOL" => ObjectiveCSyntaxKind::BoolKeyword,
362 "id" => ObjectiveCSyntaxKind::IdKeyword,
363 "self" => ObjectiveCSyntaxKind::SelfKeyword,
364 "super" => ObjectiveCSyntaxKind::SuperKeyword,
365 "nil" => ObjectiveCSyntaxKind::NilKeyword,
366 "YES" => ObjectiveCSyntaxKind::YesKeyword,
367 "NO" => ObjectiveCSyntaxKind::NoKeyword,
368
369 _ => ObjectiveCSyntaxKind::Identifier,
370 };
371
372 state.add_token(kind, start, state.get_position());
373 true
374 }
375
376 fn lex_operators<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
377 let start = state.get_position();
378 let rest = state.rest();
379
380 let patterns: &[(&str, ObjectiveCSyntaxKind)] =
382 &[("==", ObjectiveCSyntaxKind::EqualEqual), ("!=", ObjectiveCSyntaxKind::NotEqual), (">=", ObjectiveCSyntaxKind::GreaterEqual), ("<=", ObjectiveCSyntaxKind::LessEqual), ("&&", ObjectiveCSyntaxKind::And), ("||", ObjectiveCSyntaxKind::Or)];
383
384 for (pat, kind) in patterns {
385 if rest.starts_with(pat) {
386 state.advance(pat.len());
387 state.add_token(*kind, start, state.get_position());
388 return true;
389 }
390 }
391
392 if let Some(ch) = state.peek() {
393 let kind = match ch {
394 '+' => Some(ObjectiveCSyntaxKind::Plus),
395 '-' => Some(ObjectiveCSyntaxKind::Minus),
396 '*' => Some(ObjectiveCSyntaxKind::Star),
397 '/' => Some(ObjectiveCSyntaxKind::Slash),
398 '%' => Some(ObjectiveCSyntaxKind::Percent),
399 '=' => Some(ObjectiveCSyntaxKind::Equal),
400 '>' => Some(ObjectiveCSyntaxKind::Greater),
401 '<' => Some(ObjectiveCSyntaxKind::Less),
402 '!' => Some(ObjectiveCSyntaxKind::Not),
403 '?' => Some(ObjectiveCSyntaxKind::Question),
404 ':' => Some(ObjectiveCSyntaxKind::Colon),
405 '.' => Some(ObjectiveCSyntaxKind::Dot),
406 _ => None,
407 };
408
409 if let Some(k) = kind {
410 state.advance(ch.len_utf8());
411 state.add_token(k, start, state.get_position());
412 return true;
413 }
414 }
415
416 false
417 }
418
419 fn lex_single_char_tokens<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
420 let start = state.get_position();
421 if let Some(ch) = state.peek() {
422 let kind = match ch {
423 '(' => ObjectiveCSyntaxKind::LeftParen,
424 ')' => ObjectiveCSyntaxKind::RightParen,
425 '[' => ObjectiveCSyntaxKind::LeftBracket,
426 ']' => ObjectiveCSyntaxKind::RightBracket,
427 '{' => ObjectiveCSyntaxKind::LeftBrace,
428 '}' => ObjectiveCSyntaxKind::RightBrace,
429 ',' => ObjectiveCSyntaxKind::Comma,
430 ';' => ObjectiveCSyntaxKind::Semicolon,
431 '@' => ObjectiveCSyntaxKind::At,
432 _ => return false,
433 };
434
435 state.advance(ch.len_utf8());
436 state.add_token(kind, start, state.get_position());
437 true
438 }
439 else {
440 false
441 }
442 }
443}