1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5use crate::{language::ObjectiveCLanguage, lexer::token_type::ObjectiveCTokenType};
6use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
7
8pub(crate) type State<'a, S> = LexerState<'a, S, ObjectiveCLanguage>;
9
10#[derive(Clone)]
12pub struct ObjectiveCLexer<'config> {
13 #[allow(dead_code)]
14 config: &'config ObjectiveCLanguage,
15}
16
17impl<'config> Lexer<ObjectiveCLanguage> for ObjectiveCLexer<'config> {
18 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<ObjectiveCLanguage>) -> LexOutput<ObjectiveCLanguage> {
19 let mut state = State::new(source);
20 let result = self.run(&mut state);
21 if result.is_ok() {
22 state.add_eof();
23 }
24 state.finish_with_cache(result, cache)
25 }
26}
27
28impl<'config> ObjectiveCLexer<'config> {
29 pub fn new(config: &'config ObjectiveCLanguage) -> Self {
31 Self { config }
32 }
33
34 fn run<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> Result<(), OakError> {
36 while state.not_at_end() {
37 let safe_point = state.get_position();
38
39 if self.skip_whitespace(state) {
40 continue;
41 }
42
43 if self.skip_comment(state) {
44 continue;
45 }
46
47 if self.lex_string_literal(state) {
48 continue;
49 }
50
51 if self.lex_char_literal(state) {
52 continue;
53 }
54
55 if self.lex_number_literal(state) {
56 continue;
57 }
58
59 if self.lex_identifier_or_keyword(state) {
60 continue;
61 }
62
63 if self.lex_operators(state) {
64 continue;
65 }
66
67 if self.lex_single_char_tokens(state) {
68 continue;
69 }
70
71 let start_pos = state.get_position();
73 if let Some(ch) = state.peek() {
74 state.advance(ch.len_utf8());
75 state.add_token(ObjectiveCTokenType::Error, start_pos, state.get_position());
76 }
77
78 state.advance_if_dead_lock(safe_point);
79 }
80
81 Ok(())
82 }
83
84 fn skip_whitespace<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
86 let start = state.get_position();
87 while let Some(ch) = state.peek() {
88 if ch.is_whitespace() {
89 state.advance(ch.len_utf8());
90 }
91 else {
92 break;
93 }
94 }
95 if state.get_position() > start {
96 state.add_token(ObjectiveCTokenType::Whitespace, start, state.get_position());
97 true
98 }
99 else {
100 false
101 }
102 }
103
104 fn skip_comment<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
105 let start = state.get_position();
106 let rest = state.rest();
107 if rest.starts_with("//") {
109 state.advance(2);
110 while let Some(ch) = state.peek() {
111 if ch == '\n' || ch == '\r' {
112 break;
113 }
114 state.advance(ch.len_utf8());
115 }
116 state.add_token(ObjectiveCTokenType::CommentToken, start, state.get_position());
117 return true;
118 }
119 if rest.starts_with("/*") {
121 state.advance(2);
122 let mut depth = 1usize;
123 while let Some(ch) = state.peek() {
124 if ch == '/' && state.peek_next_n(1) == Some('*') {
125 state.advance(2);
126 depth += 1;
127 continue;
128 }
129 if ch == '*' && state.peek_next_n(1) == Some('/') {
130 state.advance(2);
131 depth -= 1;
132 if depth == 0 {
133 break;
134 }
135 continue;
136 }
137 state.advance(ch.len_utf8());
138 }
139 state.add_token(ObjectiveCTokenType::CommentToken, start, state.get_position());
140 return true;
141 }
142 false
143 }
144
145 fn lex_string_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
146 let start = state.get_position();
147
148 if state.peek() == Some('@') && state.peek_next_n(1) == Some('"') {
150 state.advance(2); let mut escaped = false;
152 while let Some(ch) = state.peek() {
153 if ch == '"' && !escaped {
154 state.advance(1); break;
156 }
157 state.advance(ch.len_utf8());
158 if escaped {
159 escaped = false;
160 continue;
161 }
162 if ch == '\\' {
163 escaped = true;
164 continue;
165 }
166 if ch == '\n' || ch == '\r' {
167 break;
168 }
169 }
170 state.add_token(ObjectiveCTokenType::String, start, state.get_position());
171 return true;
172 }
173
174 if state.peek() == Some('"') {
176 state.advance(1);
177 let mut escaped = false;
178 while let Some(ch) = state.peek() {
179 if ch == '"' && !escaped {
180 state.advance(1); break;
182 }
183 state.advance(ch.len_utf8());
184 if escaped {
185 escaped = false;
186 continue;
187 }
188 if ch == '\\' {
189 escaped = true;
190 continue;
191 }
192 if ch == '\n' || ch == '\r' {
193 break;
194 }
195 }
196 state.add_token(ObjectiveCTokenType::String, start, state.get_position());
197 return true;
198 }
199
200 false
201 }
202
203 fn lex_char_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
204 let start = state.get_position();
205 if state.peek() != Some('\'') {
206 return false;
207 }
208
209 state.advance(1); if let Some('\\') = state.peek() {
211 state.advance(1);
212 if let Some(c) = state.peek() {
213 state.advance(c.len_utf8());
214 }
215 }
216 else if let Some(c) = state.peek() {
217 state.advance(c.len_utf8());
218 }
219 else {
220 state.set_position(start);
221 return false;
222 }
223
224 if state.peek() == Some('\'') {
225 state.advance(1);
226 state.add_token(ObjectiveCTokenType::Character, start, state.get_position());
227 return true;
228 }
229
230 state.set_position(start);
231 false
232 }
233
234 fn lex_number_literal<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
235 let start = state.get_position();
236 let first = match state.peek() {
237 Some(c) => c,
238 None => return false,
239 };
240
241 if !first.is_ascii_digit() {
242 return false;
243 }
244
245 let mut is_float = false;
246
247 state.advance(1);
249 while let Some(c) = state.peek() {
250 if c.is_ascii_digit() {
251 state.advance(1);
252 }
253 else {
254 break;
255 }
256 }
257
258 if state.peek() == Some('.') {
260 let n1 = state.peek_next_n(1);
261 if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
262 is_float = true;
263 state.advance(1); while let Some(c) = state.peek() {
265 if c.is_ascii_digit() {
266 state.advance(1);
267 }
268 else {
269 break;
270 }
271 }
272 }
273 }
274
275 if let Some(c) = state.peek() {
277 if c == 'e' || c == 'E' {
278 let n1 = state.peek_next_n(1);
279 if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
280 is_float = true;
281 state.advance(1);
282 if let Some(sign) = state.peek() {
283 if sign == '+' || sign == '-' {
284 state.advance(1);
285 }
286 }
287 while let Some(d) = state.peek() {
288 if d.is_ascii_digit() {
289 state.advance(1);
290 }
291 else {
292 break;
293 }
294 }
295 }
296 }
297 }
298
299 while let Some(c) = state.peek() {
301 if c.is_ascii_alphabetic() {
302 state.advance(1);
303 }
304 else {
305 break;
306 }
307 }
308
309 let end = state.get_position();
310 state.add_token(if is_float { ObjectiveCTokenType::FloatLiteral } else { ObjectiveCTokenType::IntegerLiteral }, start, end);
311 true
312 }
313
314 fn lex_identifier_or_keyword<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
315 let start = state.get_position();
316 let ch = match state.peek() {
317 Some(c) => c,
318 None => return false,
319 };
320
321 if !(ch.is_ascii_alphabetic() || ch == '_' || ch == '@' || ch == '#') {
322 return false;
323 }
324
325 state.advance(1);
326 while let Some(c) = state.peek() {
327 if c.is_ascii_alphanumeric() || c == '_' {
328 state.advance(1);
329 }
330 else {
331 break;
332 }
333 }
334
335 let end = state.get_position();
336 let text = state.get_text_in(oak_core::Range { start, end });
337 let kind = match text.as_ref() {
338 "@interface" => ObjectiveCTokenType::InterfaceKeyword,
340 "@implementation" => ObjectiveCTokenType::ImplementationKeyword,
341 "@end" => ObjectiveCTokenType::EndKeyword,
342 "@property" => ObjectiveCTokenType::PropertyKeyword,
343 "@synthesize" => ObjectiveCTokenType::SynthesizeKeyword,
344 "@dynamic" => ObjectiveCTokenType::DynamicKeyword,
345 "@protocol" => ObjectiveCTokenType::ProtocolKeyword,
346 "@import" => ObjectiveCTokenType::ImportKeyword,
347 "#import" => ObjectiveCTokenType::ImportKeyword,
348 "#include" => ObjectiveCTokenType::IncludeKeyword,
349
350 "if" => ObjectiveCTokenType::IfKeyword,
352 "else" => ObjectiveCTokenType::ElseKeyword,
353 "for" => ObjectiveCTokenType::ForKeyword,
354 "while" => ObjectiveCTokenType::WhileKeyword,
355 "do" => ObjectiveCTokenType::DoKeyword,
356 "switch" => ObjectiveCTokenType::SwitchKeyword,
357 "case" => ObjectiveCTokenType::CaseKeyword,
358 "default" => ObjectiveCTokenType::DefaultKeyword,
359 "break" => ObjectiveCTokenType::BreakKeyword,
360 "continue" => ObjectiveCTokenType::ContinueKeyword,
361 "return" => ObjectiveCTokenType::ReturnKeyword,
362 "void" => ObjectiveCTokenType::VoidKeyword,
363 "int" => ObjectiveCTokenType::IntKeyword,
364 "float" => ObjectiveCTokenType::FloatKeyword,
365 "double" => ObjectiveCTokenType::DoubleKeyword,
366 "char" => ObjectiveCTokenType::CharKeyword,
367 "BOOL" => ObjectiveCTokenType::BoolKeyword,
368 "id" => ObjectiveCTokenType::IdKeyword,
369 "self" => ObjectiveCTokenType::SelfKeyword,
370 "super" => ObjectiveCTokenType::SuperKeyword,
371 "nil" => ObjectiveCTokenType::NilKeyword,
372 "YES" => ObjectiveCTokenType::YesKeyword,
373 "NO" => ObjectiveCTokenType::NoKeyword,
374
375 _ => ObjectiveCTokenType::Identifier,
376 };
377
378 state.add_token(kind, start, state.get_position());
379 true
380 }
381
382 fn lex_operators<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
383 let start = state.get_position();
384 let rest = state.rest();
385
386 let patterns: &[(&str, ObjectiveCTokenType)] =
388 &[("==", ObjectiveCTokenType::EqualEqual), ("!=", ObjectiveCTokenType::NotEqual), (">=", ObjectiveCTokenType::GreaterEqual), ("<=", ObjectiveCTokenType::LessEqual), ("&&", ObjectiveCTokenType::And), ("||", ObjectiveCTokenType::Or)];
389
390 for (pat, kind) in patterns {
391 if rest.starts_with(pat) {
392 state.advance(pat.len());
393 state.add_token(*kind, start, state.get_position());
394 return true;
395 }
396 }
397
398 if let Some(ch) = state.peek() {
399 let kind = match ch {
400 '+' => Some(ObjectiveCTokenType::Plus),
401 '-' => Some(ObjectiveCTokenType::Minus),
402 '*' => Some(ObjectiveCTokenType::Star),
403 '/' => Some(ObjectiveCTokenType::Slash),
404 '%' => Some(ObjectiveCTokenType::Percent),
405 '=' => Some(ObjectiveCTokenType::Equal),
406 '>' => Some(ObjectiveCTokenType::Greater),
407 '<' => Some(ObjectiveCTokenType::Less),
408 '!' => Some(ObjectiveCTokenType::Not),
409 '?' => Some(ObjectiveCTokenType::Question),
410 ':' => Some(ObjectiveCTokenType::Colon),
411 '.' => Some(ObjectiveCTokenType::Dot),
412 _ => None,
413 };
414
415 if let Some(k) = kind {
416 state.advance(ch.len_utf8());
417 state.add_token(k, start, state.get_position());
418 return true;
419 }
420 }
421
422 false
423 }
424
425 fn lex_single_char_tokens<S: Source + ?Sized>(&self, state: &mut State<'_, S>) -> bool {
426 let start = state.get_position();
427 if let Some(ch) = state.peek() {
428 let kind = match ch {
429 '(' => ObjectiveCTokenType::LeftParen,
430 ')' => ObjectiveCTokenType::RightParen,
431 '[' => ObjectiveCTokenType::LeftBracket,
432 ']' => ObjectiveCTokenType::RightBracket,
433 '{' => ObjectiveCTokenType::LeftBrace,
434 '}' => ObjectiveCTokenType::RightBrace,
435 ',' => ObjectiveCTokenType::Comma,
436 ';' => ObjectiveCTokenType::Semicolon,
437 '@' => ObjectiveCTokenType::At,
438 _ => return false,
439 };
440
441 state.advance(ch.len_utf8());
442 state.add_token(kind, start, state.get_position());
443 true
444 }
445 else {
446 false
447 }
448 }
449}