1pub mod token_type;
2
3pub use token_type::AdaTokenType;
4
5use crate::language::AdaLanguage;
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError,
8 lexer::{LexOutput, WhitespaceConfig},
9 source::Source,
10};
11use std::sync::LazyLock;
12
13type State<'a, S> = LexerState<'a, S, AdaLanguage>;
14
15static ADA_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16
17#[derive(Clone)]
18pub struct AdaLexer;
19
20impl Lexer<AdaLanguage> for AdaLexer {
21 fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<AdaLanguage>) -> LexOutput<AdaLanguage> {
22 let mut state: State<'_, S> = LexerState::new(source);
23 let result = self.run(&mut state);
24 if result.is_ok() {
25 state.add_eof();
26 }
27 state.finish_with_cache(result, cache)
28 }
29}
30
31impl AdaLexer {
32 pub fn new(_config: &AdaLanguage) -> Self {
33 Self
34 }
35
36 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
38 while state.not_at_end() {
39 let safe_point = state.get_position();
40
41 if self.skip_whitespace(state) {
42 continue;
43 }
44
45 if self.skip_comment(state) {
46 continue;
47 }
48
49 if self.lex_string_literal(state) {
50 continue;
51 }
52
53 if self.lex_char_literal(state) {
54 continue;
55 }
56
57 if self.lex_number_literal(state) {
58 continue;
59 }
60
61 if self.lex_identifier_or_keyword(state) {
62 continue;
63 }
64
65 if self.lex_operators(state) {
66 continue;
67 }
68
69 if self.lex_single_char_tokens(state) {
70 continue;
71 }
72
73 if let Some(ch) = state.peek() {
75 state.advance(ch.len_utf8());
76 state.add_token(AdaTokenType::Error, safe_point, state.get_position());
77 }
78 }
79
80 Ok(())
81 }
82
83 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85 ADA_WHITESPACE.scan(state, AdaTokenType::Whitespace)
86 }
87
88 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
89 let start = state.get_position();
90
91 if state.consume_if_starts_with("--") {
93 while let Some(ch) = state.peek() {
94 if ch == '\n' || ch == '\r' {
95 break;
96 }
97 state.advance(ch.len_utf8());
98 }
99 state.add_token(AdaTokenType::Comment, start, state.get_position());
100 return true;
101 }
102 false
103 }
104
105 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
106 let start = state.get_position();
107
108 if state.peek() == Some('"') {
110 state.advance(1);
111 while let Some(ch) = state.peek() {
112 if ch == '"' {
113 state.advance(1); if state.peek() == Some('"') {
115 state.advance(1);
117 continue;
118 }
119 break;
120 }
121 state.advance(ch.len_utf8());
122 if ch == '\n' || ch == '\r' {
123 break;
124 }
125 }
126 state.add_token(AdaTokenType::StringLiteral, start, state.get_position());
127 return true;
128 }
129 false
130 }
131
132 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
133 let start = state.get_position();
134 if state.peek() != Some('\'') {
135 return false;
136 }
137
138 state.advance(1); if let Some(c) = state.peek() {
141 state.advance(c.len_utf8());
142 }
143 else {
144 state.set_position(start);
145 return false;
146 }
147
148 if state.peek() == Some('\'') {
149 state.advance(1);
150 state.add_token(AdaTokenType::CharacterLiteral, start, state.get_position());
151 return true;
152 }
153 state.set_position(start);
154 false
155 }
156
157 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
158 let start = state.get_position();
159
160 if let Some(ch) = state.peek() {
161 if ch.is_ascii_digit() {
162 state.advance(ch.len_utf8());
164 while let Some(ch) = state.peek() {
165 if ch.is_ascii_digit() || ch == '_' {
166 state.advance(ch.len_utf8());
167 }
168 else {
169 break;
170 }
171 }
172
173 if state.peek() == Some('.') {
175 state.advance(1);
176 while let Some(ch) = state.peek() {
177 if ch.is_ascii_digit() || ch == '_' {
178 state.advance(ch.len_utf8());
179 }
180 else {
181 break;
182 }
183 }
184 }
185
186 if let Some(ch) = state.peek() {
188 if ch == 'e' || ch == 'E' {
189 state.advance(1);
190 if let Some(sign) = state.peek() {
191 if sign == '+' || sign == '-' {
192 state.advance(1);
193 }
194 }
195 while let Some(ch) = state.peek() {
196 if ch.is_ascii_digit() {
197 state.advance(ch.len_utf8());
198 }
199 else {
200 break;
201 }
202 }
203 }
204 }
205
206 state.add_token(AdaTokenType::NumberLiteral, start, state.get_position());
207 return true;
208 }
209 }
210 false
211 }
212
213 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
214 let start = state.get_position();
215
216 if let Some(ch) = state.peek() {
217 if ch.is_ascii_alphabetic() || ch == '_' {
218 state.advance(ch.len_utf8());
219
220 while let Some(ch) = state.peek() {
221 if ch.is_ascii_alphanumeric() || ch == '_' {
222 state.advance(ch.len_utf8());
223 }
224 else {
225 break;
226 }
227 }
228
229 let end = state.get_position();
230 let text = state.get_text_in((start..end).into());
231 let kind = match text.to_lowercase().as_str() {
232 "abort" => AdaTokenType::Abort,
233 "abs" => AdaTokenType::Abs,
234 "abstract" => AdaTokenType::Abstract,
235 "accept" => AdaTokenType::Accept,
236 "access" => AdaTokenType::Access,
237 "aliased" => AdaTokenType::Aliased,
238 "all" => AdaTokenType::All,
239 "and" => AdaTokenType::And,
240 "array" => AdaTokenType::Array,
241 "at" => AdaTokenType::At,
242 "begin" => AdaTokenType::Begin,
243 "body" => AdaTokenType::Body,
244 "case" => AdaTokenType::Case,
245 "constant" => AdaTokenType::Constant,
246 "declare" => AdaTokenType::Declare,
247 "delay" => AdaTokenType::Delay,
248 "delta" => AdaTokenType::Delta,
249 "digits" => AdaTokenType::Digits,
250 "do" => AdaTokenType::Do,
251 "else" => AdaTokenType::Else,
252 "elsif" => AdaTokenType::Elsif,
253 "end" => AdaTokenType::End,
254 "entry" => AdaTokenType::Entry,
255 "exception" => AdaTokenType::Exception,
256 "exit" => AdaTokenType::Exit,
257 "for" => AdaTokenType::For,
258 "function" => AdaTokenType::Function,
259 "generic" => AdaTokenType::Generic,
260 "goto" => AdaTokenType::Goto,
261 "if" => AdaTokenType::If,
262 "in" => AdaTokenType::In,
263 "interface" => AdaTokenType::Interface,
264 "is" => AdaTokenType::Is,
265 "limited" => AdaTokenType::Limited,
266 "loop" => AdaTokenType::Loop,
267 "mod" => AdaTokenType::Mod,
268 "new" => AdaTokenType::New,
269 "not" => AdaTokenType::Not,
270 "null" => AdaTokenType::Null,
271 "of" => AdaTokenType::Of,
272 "or" => AdaTokenType::Or,
273 "others" => AdaTokenType::Others,
274 "out" => AdaTokenType::Out,
275 "overriding" => AdaTokenType::Overriding,
276 "package" => AdaTokenType::Package,
277 "pragma" => AdaTokenType::Pragma,
278 "private" => AdaTokenType::Private,
279 "procedure" => AdaTokenType::Procedure,
280 "protected" => AdaTokenType::Protected,
281 "raise" => AdaTokenType::Raise,
282 "range" => AdaTokenType::Range,
283 "record" => AdaTokenType::Record,
284 "rem" => AdaTokenType::Rem,
285 "renames" => AdaTokenType::Renames,
286 "requeue" => AdaTokenType::Requeue,
287 "return" => AdaTokenType::Return,
288 "reverse" => AdaTokenType::Reverse,
289 "select" => AdaTokenType::Select,
290 "separate" => AdaTokenType::Separate,
291 "some" => AdaTokenType::Some,
292 "subtype" => AdaTokenType::Subtype,
293 "synchronized" => AdaTokenType::Synchronized,
294 "tagged" => AdaTokenType::Tagged,
295 "task" => AdaTokenType::Task,
296 "terminate" => AdaTokenType::Terminate,
297 "then" => AdaTokenType::Then,
298 "type" => AdaTokenType::Type,
299 "until" => AdaTokenType::Until,
300 "use" => AdaTokenType::Use,
301 "when" => AdaTokenType::When,
302 "while" => AdaTokenType::While,
303 "with" => AdaTokenType::With,
304 "xor" => AdaTokenType::Xor,
305 _ => AdaTokenType::Identifier,
306 };
307
308 state.add_token(kind, start, end);
309 return true;
310 }
311 }
312 false
313 }
314
315 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
316 let start = state.get_position();
317
318 if state.consume_if_starts_with("**") {
320 state.add_token(AdaTokenType::StarStar, start, state.get_position());
321 return true;
322 }
323 if state.consume_if_starts_with("=>") {
324 state.add_token(AdaTokenType::Arrow, start, state.get_position());
325 return true;
326 }
327 if state.consume_if_starts_with("<=") {
328 state.add_token(AdaTokenType::Le, start, state.get_position());
329 return true;
330 }
331 if state.consume_if_starts_with(">=") {
332 state.add_token(AdaTokenType::Ge, start, state.get_position());
333 return true;
334 }
335 if state.consume_if_starts_with(":=") {
336 state.add_token(AdaTokenType::ColonEq, start, state.get_position());
337 return true;
338 }
339 if state.consume_if_starts_with("..") {
340 state.add_token(AdaTokenType::DotDot, start, state.get_position());
341 return true;
342 }
343 if state.consume_if_starts_with("/=") {
344 state.add_token(AdaTokenType::Ne, start, state.get_position());
345 return true;
346 }
347 if state.consume_if_starts_with("<<") {
348 state.add_token(AdaTokenType::LtLt, start, state.get_position());
349 return true;
350 }
351 if state.consume_if_starts_with(">>") {
352 state.add_token(AdaTokenType::GtGt, start, state.get_position());
353 return true;
354 }
355 if state.consume_if_starts_with("<>") {
356 state.add_token(AdaTokenType::Box, start, state.get_position());
357 return true;
358 }
359
360 if let Some(ch) = state.peek() {
362 let kind = match ch {
363 '+' => AdaTokenType::Plus,
364 '-' => AdaTokenType::Minus,
365 '*' => AdaTokenType::Star,
366 '/' => AdaTokenType::Slash,
367 '=' => AdaTokenType::Eq,
368 '<' => AdaTokenType::Lt,
369 '>' => AdaTokenType::Gt,
370 '&' => AdaTokenType::Ampersand,
371 '|' => AdaTokenType::Pipe,
372 _ => return false,
373 };
374 state.advance(1);
375 state.add_token(kind, start, state.get_position());
376 return true;
377 }
378 false
379 }
380
381 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
382 let start = state.get_position();
383
384 if let Some(ch) = state.peek() {
385 let kind = match ch {
386 '(' => AdaTokenType::LeftParen,
387 ')' => AdaTokenType::RightParen,
388 '[' => AdaTokenType::LeftBracket,
389 ']' => AdaTokenType::RightBracket,
390 '{' => AdaTokenType::LeftBrace,
391 '}' => AdaTokenType::RightBrace,
392 ',' => AdaTokenType::Comma,
393 ';' => AdaTokenType::Semicolon,
394 ':' => AdaTokenType::Colon,
395 '.' => AdaTokenType::Dot,
396 '\'' => AdaTokenType::Apostrophe,
397 _ => return false,
398 };
399 state.advance(1);
400 state.add_token(kind, start, state.get_position());
401 return true;
402 }
403 false
404 }
405}