1pub mod token_type;
2
3pub use token_type::AdaTokenType;
4
5use crate::language::AdaLanguage;
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError,
8 lexer::{LexOutput, WhitespaceConfig},
9 source::Source,
10};
11use std::sync::LazyLock;
12
13type State<'a, S> = LexerState<'a, S, AdaLanguage>;
14
15static ADA_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16
17#[derive(Clone, Debug)]
18pub struct AdaLexer<'config> {
19 config: &'config AdaLanguage,
20}
21
22impl<'config> Lexer<AdaLanguage> for AdaLexer<'config> {
23 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<AdaLanguage>) -> LexOutput<AdaLanguage> {
24 let mut state: State<'_, S> = LexerState::new_with_cache(source, 0, cache);
25 let result = self.run(&mut state);
26 if result.is_ok() {
27 state.add_eof();
28 }
29 state.finish_with_cache(result, cache)
30 }
31}
32
33impl<'config> AdaLexer<'config> {
34 pub fn new(config: &'config AdaLanguage) -> Self {
35 Self { config }
36 }
37
38 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
40 while state.not_at_end() {
41 let safe_point = state.get_position();
42
43 if self.skip_whitespace(state) {
44 continue;
45 }
46
47 if self.skip_comment(state) {
48 continue;
49 }
50
51 if self.lex_string_literal(state) {
52 continue;
53 }
54
55 if self.lex_char_literal(state) {
56 continue;
57 }
58
59 if self.lex_number_literal(state) {
60 continue;
61 }
62
63 if self.lex_identifier_or_keyword(state) {
64 continue;
65 }
66
67 if self.lex_operators(state) {
68 continue;
69 }
70
71 if self.lex_single_char_tokens(state) {
72 continue;
73 }
74
75 if let Some(ch) = state.peek() {
77 state.advance(ch.len_utf8());
78 state.add_token(AdaTokenType::Error, safe_point, state.get_position());
79 }
80 }
81
82 Ok(())
83 }
84
85 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
87 ADA_WHITESPACE.scan(state, AdaTokenType::Whitespace)
88 }
89
90 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
91 let start = state.get_position();
92
93 if state.consume_if_starts_with("--") {
95 while let Some(ch) = state.peek() {
96 if ch == '\n' || ch == '\r' {
97 break;
98 }
99 state.advance(ch.len_utf8());
100 }
101 state.add_token(AdaTokenType::Comment, start, state.get_position());
102 return true;
103 }
104 false
105 }
106
107 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
108 let start = state.get_position();
109
110 if state.peek() == Some('"') {
112 state.advance(1);
113 while let Some(ch) = state.peek() {
114 if ch == '"' {
115 state.advance(1); if state.peek() == Some('"') {
117 state.advance(1);
119 continue;
120 }
121 break;
122 }
123 state.advance(ch.len_utf8());
124 if ch == '\n' || ch == '\r' {
125 break;
126 }
127 }
128 state.add_token(AdaTokenType::StringLiteral, start, state.get_position());
129 return true;
130 }
131 false
132 }
133
134 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
135 let start = state.get_position();
136 if state.peek() != Some('\'') {
137 return false;
138 }
139
140 state.advance(1); if let Some(c) = state.peek() {
143 state.advance(c.len_utf8());
144 }
145 else {
146 state.set_position(start);
147 return false;
148 }
149
150 if state.peek() == Some('\'') {
151 state.advance(1);
152 state.add_token(AdaTokenType::CharacterLiteral, start, state.get_position());
153 return true;
154 }
155 state.set_position(start);
156 false
157 }
158
159 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
160 let start = state.get_position();
161
162 if let Some(ch) = state.peek() {
163 if ch.is_ascii_digit() {
164 state.advance(ch.len_utf8());
166 while let Some(ch) = state.peek() {
167 if ch.is_ascii_digit() || ch == '_' {
168 state.advance(ch.len_utf8());
169 }
170 else {
171 break;
172 }
173 }
174
175 if state.peek() == Some('.') {
177 state.advance(1);
178 while let Some(ch) = state.peek() {
179 if ch.is_ascii_digit() || ch == '_' {
180 state.advance(ch.len_utf8());
181 }
182 else {
183 break;
184 }
185 }
186 }
187
188 if let Some(ch) = state.peek() {
190 if ch == 'e' || ch == 'E' {
191 state.advance(1);
192 if let Some(sign) = state.peek() {
193 if sign == '+' || sign == '-' {
194 state.advance(1);
195 }
196 }
197 while let Some(ch) = state.peek() {
198 if ch.is_ascii_digit() {
199 state.advance(ch.len_utf8());
200 }
201 else {
202 break;
203 }
204 }
205 }
206 }
207
208 state.add_token(AdaTokenType::NumberLiteral, start, state.get_position());
209 return true;
210 }
211 }
212 false
213 }
214
215 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
216 let start = state.get_position();
217
218 if let Some(ch) = state.peek() {
219 if ch.is_ascii_alphabetic() || ch == '_' {
220 state.advance(ch.len_utf8());
221
222 while let Some(ch) = state.peek() {
223 if ch.is_ascii_alphanumeric() || ch == '_' {
224 state.advance(ch.len_utf8());
225 }
226 else {
227 break;
228 }
229 }
230
231 let end = state.get_position();
232 let text = state.get_text_in((start..end).into());
233 let kind = match text.to_lowercase().as_str() {
234 "abort" => AdaTokenType::Abort,
235 "abs" => AdaTokenType::Abs,
236 "abstract" => AdaTokenType::Abstract,
237 "accept" => AdaTokenType::Accept,
238 "access" => AdaTokenType::Access,
239 "aliased" => AdaTokenType::Aliased,
240 "all" => AdaTokenType::All,
241 "and" => AdaTokenType::And,
242 "array" => AdaTokenType::Array,
243 "at" => AdaTokenType::At,
244 "begin" => AdaTokenType::Begin,
245 "body" => AdaTokenType::Body,
246 "case" => AdaTokenType::Case,
247 "constant" => AdaTokenType::Constant,
248 "declare" => AdaTokenType::Declare,
249 "delay" => AdaTokenType::Delay,
250 "delta" => AdaTokenType::Delta,
251 "digits" => AdaTokenType::Digits,
252 "do" => AdaTokenType::Do,
253 "else" => AdaTokenType::Else,
254 "elsif" => AdaTokenType::Elsif,
255 "end" => AdaTokenType::End,
256 "entry" => AdaTokenType::Entry,
257 "exception" => AdaTokenType::Exception,
258 "exit" => AdaTokenType::Exit,
259 "for" => AdaTokenType::For,
260 "function" => AdaTokenType::Function,
261 "generic" => AdaTokenType::Generic,
262 "goto" => AdaTokenType::Goto,
263 "if" => AdaTokenType::If,
264 "in" => AdaTokenType::In,
265 "interface" => AdaTokenType::Interface,
266 "is" => AdaTokenType::Is,
267 "limited" => AdaTokenType::Limited,
268 "loop" => AdaTokenType::Loop,
269 "mod" => AdaTokenType::Mod,
270 "new" => AdaTokenType::New,
271 "not" => AdaTokenType::Not,
272 "null" => AdaTokenType::Null,
273 "of" => AdaTokenType::Of,
274 "or" => AdaTokenType::Or,
275 "others" => AdaTokenType::Others,
276 "out" => AdaTokenType::Out,
277 "overriding" => AdaTokenType::Overriding,
278 "package" => AdaTokenType::Package,
279 "pragma" => AdaTokenType::Pragma,
280 "private" => AdaTokenType::Private,
281 "procedure" => AdaTokenType::Procedure,
282 "protected" => AdaTokenType::Protected,
283 "raise" => AdaTokenType::Raise,
284 "range" => AdaTokenType::Range,
285 "record" => AdaTokenType::Record,
286 "rem" => AdaTokenType::Rem,
287 "renames" => AdaTokenType::Renames,
288 "requeue" => AdaTokenType::Requeue,
289 "return" => AdaTokenType::Return,
290 "reverse" => AdaTokenType::Reverse,
291 "select" => AdaTokenType::Select,
292 "separate" => AdaTokenType::Separate,
293 "some" => AdaTokenType::Some,
294 "subtype" => AdaTokenType::Subtype,
295 "synchronized" => AdaTokenType::Synchronized,
296 "tagged" => AdaTokenType::Tagged,
297 "task" => AdaTokenType::Task,
298 "terminate" => AdaTokenType::Terminate,
299 "then" => AdaTokenType::Then,
300 "type" => AdaTokenType::Type,
301 "until" => AdaTokenType::Until,
302 "use" => AdaTokenType::Use,
303 "when" => AdaTokenType::When,
304 "while" => AdaTokenType::While,
305 "with" => AdaTokenType::With,
306 "xor" => AdaTokenType::Xor,
307 _ => AdaTokenType::Identifier,
308 };
309
310 state.add_token(kind, start, end);
311 return true;
312 }
313 }
314 false
315 }
316
317 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
318 let start = state.get_position();
319
320 if state.consume_if_starts_with("**") {
322 state.add_token(AdaTokenType::StarStar, start, state.get_position());
323 return true;
324 }
325 if state.consume_if_starts_with("=>") {
326 state.add_token(AdaTokenType::Arrow, start, state.get_position());
327 return true;
328 }
329 if state.consume_if_starts_with("<=") {
330 state.add_token(AdaTokenType::Le, start, state.get_position());
331 return true;
332 }
333 if state.consume_if_starts_with(">=") {
334 state.add_token(AdaTokenType::Ge, start, state.get_position());
335 return true;
336 }
337 if state.consume_if_starts_with(":=") {
338 state.add_token(AdaTokenType::ColonEq, start, state.get_position());
339 return true;
340 }
341 if state.consume_if_starts_with("..") {
342 state.add_token(AdaTokenType::DotDot, start, state.get_position());
343 return true;
344 }
345 if state.consume_if_starts_with("/=") {
346 state.add_token(AdaTokenType::Ne, start, state.get_position());
347 return true;
348 }
349 if state.consume_if_starts_with("<<") {
350 state.add_token(AdaTokenType::LtLt, start, state.get_position());
351 return true;
352 }
353 if state.consume_if_starts_with(">>") {
354 state.add_token(AdaTokenType::GtGt, start, state.get_position());
355 return true;
356 }
357 if state.consume_if_starts_with("<>") {
358 state.add_token(AdaTokenType::Box, start, state.get_position());
359 return true;
360 }
361
362 if let Some(ch) = state.peek() {
364 let kind = match ch {
365 '+' => AdaTokenType::Plus,
366 '-' => AdaTokenType::Minus,
367 '*' => AdaTokenType::Star,
368 '/' => AdaTokenType::Slash,
369 '=' => AdaTokenType::Eq,
370 '<' => AdaTokenType::Lt,
371 '>' => AdaTokenType::Gt,
372 '&' => AdaTokenType::Ampersand,
373 '|' => AdaTokenType::Pipe,
374 _ => return false,
375 };
376 state.advance(1);
377 state.add_token(kind, start, state.get_position());
378 return true;
379 }
380 false
381 }
382
383 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
384 let start = state.get_position();
385
386 if let Some(ch) = state.peek() {
387 let kind = match ch {
388 '(' => AdaTokenType::LeftParen,
389 ')' => AdaTokenType::RightParen,
390 '[' => AdaTokenType::LeftBracket,
391 ']' => AdaTokenType::RightBracket,
392 '{' => AdaTokenType::LeftBrace,
393 '}' => AdaTokenType::RightBrace,
394 ',' => AdaTokenType::Comma,
395 ';' => AdaTokenType::Semicolon,
396 ':' => AdaTokenType::Colon,
397 '.' => AdaTokenType::Dot,
398 '\'' => AdaTokenType::Apostrophe,
399 _ => return false,
400 };
401 state.advance(1);
402 state.add_token(kind, start, state.get_position());
403 return true;
404 }
405 false
406 }
407}