1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5pub use token_type::AdaTokenType;
6
7use crate::language::AdaLanguage;
8use oak_core::{
9 Lexer, LexerCache, LexerState, OakError,
10 lexer::{LexOutput, WhitespaceConfig},
11 source::Source,
12};
13use std::sync::LazyLock;
14
15pub(crate) type State<'a, S> = LexerState<'a, S, AdaLanguage>;
16
17static ADA_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
18
19#[derive(Clone, Debug)]
21pub struct AdaLexer<'config> {
22 config: &'config AdaLanguage,
23}
24
25impl<'config> Lexer<AdaLanguage> for AdaLexer<'config> {
26 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<AdaLanguage>) -> LexOutput<AdaLanguage> {
27 let mut state: State<'_, S> = LexerState::new_with_cache(source, 0, cache);
28 let result = self.run(&mut state);
29 if result.is_ok() {
30 state.add_eof()
31 }
32 state.finish_with_cache(result, cache)
33 }
34}
35
36impl<'config> AdaLexer<'config> {
37 pub fn new(config: &'config AdaLanguage) -> Self {
39 Self { config }
40 }
41
42 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
44 while state.not_at_end() {
45 let safe_point = state.get_position();
46
47 if self.skip_whitespace(state) {
48 continue;
49 }
50
51 if self.skip_comment(state) {
52 continue;
53 }
54
55 if self.lex_string_literal(state) {
56 continue;
57 }
58
59 if self.lex_char_literal(state) {
60 continue;
61 }
62
63 if self.lex_number_literal(state) {
64 continue;
65 }
66
67 if self.lex_identifier_or_keyword(state) {
68 continue;
69 }
70
71 if self.lex_operators(state) {
72 continue;
73 }
74
75 if self.lex_single_char_tokens(state) {
76 continue;
77 }
78
79 if let Some(ch) = state.peek() {
81 state.advance(ch.len_utf8());
82 state.add_token(AdaTokenType::Error, safe_point, state.get_position())
83 }
84 }
85
86 Ok(())
87 }
88
89 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
91 ADA_WHITESPACE.scan(state, AdaTokenType::Whitespace)
92 }
93
94 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
95 let start = state.get_position();
96
97 if state.consume_if_starts_with("--") {
99 while let Some(ch) = state.peek() {
100 if ch == '\n' || ch == '\r' {
101 break;
102 }
103 state.advance(ch.len_utf8())
104 }
105 state.add_token(AdaTokenType::Comment, start, state.get_position());
106 return true;
107 }
108 false
109 }
110
111 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
112 let start = state.get_position();
113
114 if state.peek() == Some('"') {
116 state.advance(1);
117 while let Some(ch) = state.peek() {
118 if ch == '"' {
119 state.advance(1); if state.peek() == Some('"') {
121 state.advance(1);
123 continue;
124 }
125 break;
126 }
127 state.advance(ch.len_utf8());
128 if ch == '\n' || ch == '\r' {
129 break;
130 }
131 }
132 state.add_token(AdaTokenType::StringLiteral, start, state.get_position());
133 return true;
134 }
135 false
136 }
137
138 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
139 let start = state.get_position();
140 if state.peek() != Some('\'') {
141 return false;
142 }
143
144 state.advance(1); if let Some(c) = state.peek() {
147 state.advance(c.len_utf8())
148 }
149 else {
150 state.set_position(start);
151 return false;
152 }
153
154 if state.peek() == Some('\'') {
155 state.advance(1);
156 state.add_token(AdaTokenType::CharacterLiteral, start, state.get_position());
157 return true;
158 }
159 state.set_position(start);
160 false
161 }
162
163 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
164 let start = state.get_position();
165
166 if let Some(ch) = state.peek() {
167 if ch.is_ascii_digit() {
168 state.advance(ch.len_utf8());
170 while let Some(ch) = state.peek() {
171 if ch.is_ascii_digit() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
172 }
173
174 if state.peek() == Some('.') {
176 state.advance(1);
177 while let Some(ch) = state.peek() {
178 if ch.is_ascii_digit() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
179 }
180 }
181
182 if let Some(ch) = state.peek() {
184 if ch == 'e' || ch == 'E' {
185 state.advance(1);
186 if let Some(sign) = state.peek() {
187 if sign == '+' || sign == '-' {
188 state.advance(1)
189 }
190 }
191 while let Some(ch) = state.peek() {
192 if ch.is_ascii_digit() { state.advance(ch.len_utf8()) } else { break }
193 }
194 }
195 }
196
197 state.add_token(AdaTokenType::NumberLiteral, start, state.get_position());
198 return true;
199 }
200 }
201 false
202 }
203
204 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
205 let start = state.get_position();
206
207 if let Some(ch) = state.peek() {
208 if ch.is_ascii_alphabetic() || ch == '_' {
209 state.advance(ch.len_utf8());
210
211 while let Some(ch) = state.peek() {
212 if ch.is_ascii_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
213 }
214
215 let end = state.get_position();
216 let text = state.get_text_in((start..end).into());
217 let kind = match text.to_lowercase().as_str() {
218 "abort" => AdaTokenType::Abort,
219 "abs" => AdaTokenType::Abs,
220 "abstract" => AdaTokenType::Abstract,
221 "accept" => AdaTokenType::Accept,
222 "access" => AdaTokenType::Access,
223 "aliased" => AdaTokenType::Aliased,
224 "all" => AdaTokenType::All,
225 "and" => AdaTokenType::And,
226 "array" => AdaTokenType::Array,
227 "at" => AdaTokenType::At,
228 "begin" => AdaTokenType::Begin,
229 "body" => AdaTokenType::Body,
230 "case" => AdaTokenType::Case,
231 "constant" => AdaTokenType::Constant,
232 "declare" => AdaTokenType::Declare,
233 "delay" => AdaTokenType::Delay,
234 "delta" => AdaTokenType::Delta,
235 "digits" => AdaTokenType::Digits,
236 "do" => AdaTokenType::Do,
237 "else" => AdaTokenType::Else,
238 "elsif" => AdaTokenType::Elsif,
239 "end" => AdaTokenType::End,
240 "entry" => AdaTokenType::Entry,
241 "exception" => AdaTokenType::Exception,
242 "exit" => AdaTokenType::Exit,
243 "for" => AdaTokenType::For,
244 "function" => AdaTokenType::Function,
245 "generic" => AdaTokenType::Generic,
246 "goto" => AdaTokenType::Goto,
247 "if" => AdaTokenType::If,
248 "in" => AdaTokenType::In,
249 "interface" => AdaTokenType::Interface,
250 "is" => AdaTokenType::Is,
251 "limited" => AdaTokenType::Limited,
252 "loop" => AdaTokenType::Loop,
253 "mod" => AdaTokenType::Mod,
254 "new" => AdaTokenType::New,
255 "not" => AdaTokenType::Not,
256 "null" => AdaTokenType::Null,
257 "of" => AdaTokenType::Of,
258 "or" => AdaTokenType::Or,
259 "others" => AdaTokenType::Others,
260 "out" => AdaTokenType::Out,
261 "overriding" => AdaTokenType::Overriding,
262 "package" => AdaTokenType::Package,
263 "pragma" => AdaTokenType::Pragma,
264 "private" => AdaTokenType::Private,
265 "procedure" => AdaTokenType::Procedure,
266 "protected" => AdaTokenType::Protected,
267 "raise" => AdaTokenType::Raise,
268 "range" => AdaTokenType::Range,
269 "record" => AdaTokenType::Record,
270 "rem" => AdaTokenType::Rem,
271 "renames" => AdaTokenType::Renames,
272 "requeue" => AdaTokenType::Requeue,
273 "return" => AdaTokenType::Return,
274 "reverse" => AdaTokenType::Reverse,
275 "select" => AdaTokenType::Select,
276 "separate" => AdaTokenType::Separate,
277 "some" => AdaTokenType::Some,
278 "subtype" => AdaTokenType::Subtype,
279 "synchronized" => AdaTokenType::Synchronized,
280 "tagged" => AdaTokenType::Tagged,
281 "task" => AdaTokenType::Task,
282 "terminate" => AdaTokenType::Terminate,
283 "then" => AdaTokenType::Then,
284 "type" => AdaTokenType::Type,
285 "until" => AdaTokenType::Until,
286 "use" => AdaTokenType::Use,
287 "when" => AdaTokenType::When,
288 "while" => AdaTokenType::While,
289 "with" => AdaTokenType::With,
290 "xor" => AdaTokenType::Xor,
291 _ => AdaTokenType::Identifier,
292 };
293
294 state.add_token(kind, start, end);
295 return true;
296 }
297 }
298 false
299 }
300
301 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
302 let start = state.get_position();
303
304 if state.consume_if_starts_with("**") {
306 state.add_token(AdaTokenType::StarStar, start, state.get_position());
307 return true;
308 }
309 if state.consume_if_starts_with("=>") {
310 state.add_token(AdaTokenType::Arrow, start, state.get_position());
311 return true;
312 }
313 if state.consume_if_starts_with("<=") {
314 state.add_token(AdaTokenType::Le, start, state.get_position());
315 return true;
316 }
317 if state.consume_if_starts_with(">=") {
318 state.add_token(AdaTokenType::Ge, start, state.get_position());
319 return true;
320 }
321 if state.consume_if_starts_with(":=") {
322 state.add_token(AdaTokenType::ColonEq, start, state.get_position());
323 return true;
324 }
325 if state.consume_if_starts_with("..") {
326 state.add_token(AdaTokenType::DotDot, start, state.get_position());
327 return true;
328 }
329 if state.consume_if_starts_with("/=") {
330 state.add_token(AdaTokenType::Ne, start, state.get_position());
331 return true;
332 }
333 if state.consume_if_starts_with("<<") {
334 state.add_token(AdaTokenType::LtLt, start, state.get_position());
335 return true;
336 }
337 if state.consume_if_starts_with(">>") {
338 state.add_token(AdaTokenType::GtGt, start, state.get_position());
339 return true;
340 }
341 if state.consume_if_starts_with("<>") {
342 state.add_token(AdaTokenType::Box, start, state.get_position());
343 return true;
344 }
345
346 if let Some(ch) = state.peek() {
348 let kind = match ch {
349 '+' => AdaTokenType::Plus,
350 '-' => AdaTokenType::Minus,
351 '*' => AdaTokenType::Star,
352 '/' => AdaTokenType::Slash,
353 '=' => AdaTokenType::Eq,
354 '<' => AdaTokenType::Lt,
355 '>' => AdaTokenType::Gt,
356 '&' => AdaTokenType::Ampersand,
357 '|' => AdaTokenType::Pipe,
358 _ => return false,
359 };
360 state.advance(1);
361 state.add_token(kind, start, state.get_position());
362 return true;
363 }
364 false
365 }
366
367 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
368 let start = state.get_position();
369
370 if let Some(ch) = state.peek() {
371 let kind = match ch {
372 '(' => AdaTokenType::LeftParen,
373 ')' => AdaTokenType::RightParen,
374 '[' => AdaTokenType::LeftBracket,
375 ']' => AdaTokenType::RightBracket,
376 '{' => AdaTokenType::LeftBrace,
377 '}' => AdaTokenType::RightBrace,
378 ',' => AdaTokenType::Comma,
379 ';' => AdaTokenType::Semicolon,
380 ':' => AdaTokenType::Colon,
381 '.' => AdaTokenType::Dot,
382 '\'' => AdaTokenType::Apostrophe,
383 _ => return false,
384 };
385 state.advance(1);
386 state.add_token(kind, start, state.get_position());
387 return true;
388 }
389 false
390 }
391}