1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4pub use token_type::AdaTokenType;
5
6use crate::language::AdaLanguage;
7use oak_core::{
8 Lexer, LexerCache, LexerState, OakError,
9 lexer::{LexOutput, WhitespaceConfig},
10 source::Source,
11};
12use std::sync::LazyLock;
13
14type State<'a, S> = LexerState<'a, S, AdaLanguage>;
15
16static ADA_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
17
18#[derive(Clone, Debug)]
19pub struct AdaLexer<'config> {
20 config: &'config AdaLanguage,
21}
22
23impl<'config> Lexer<AdaLanguage> for AdaLexer<'config> {
24 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<AdaLanguage>) -> LexOutput<AdaLanguage> {
25 let mut state: State<'_, S> = LexerState::new_with_cache(source, 0, cache);
26 let result = self.run(&mut state);
27 if result.is_ok() {
28 state.add_eof()
29 }
30 state.finish_with_cache(result, cache)
31 }
32}
33
34impl<'config> AdaLexer<'config> {
35 pub fn new(config: &'config AdaLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
41 while state.not_at_end() {
42 let safe_point = state.get_position();
43
44 if self.skip_whitespace(state) {
45 continue;
46 }
47
48 if self.skip_comment(state) {
49 continue;
50 }
51
52 if self.lex_string_literal(state) {
53 continue;
54 }
55
56 if self.lex_char_literal(state) {
57 continue;
58 }
59
60 if self.lex_number_literal(state) {
61 continue;
62 }
63
64 if self.lex_identifier_or_keyword(state) {
65 continue;
66 }
67
68 if self.lex_operators(state) {
69 continue;
70 }
71
72 if self.lex_single_char_tokens(state) {
73 continue;
74 }
75
76 if let Some(ch) = state.peek() {
78 state.advance(ch.len_utf8());
79 state.add_token(AdaTokenType::Error, safe_point, state.get_position())
80 }
81 }
82
83 Ok(())
84 }
85
86 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88 ADA_WHITESPACE.scan(state, AdaTokenType::Whitespace)
89 }
90
91 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
92 let start = state.get_position();
93
94 if state.consume_if_starts_with("--") {
96 while let Some(ch) = state.peek() {
97 if ch == '\n' || ch == '\r' {
98 break;
99 }
100 state.advance(ch.len_utf8())
101 }
102 state.add_token(AdaTokenType::Comment, start, state.get_position());
103 return true;
104 }
105 false
106 }
107
108 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
109 let start = state.get_position();
110
111 if state.peek() == Some('"') {
113 state.advance(1);
114 while let Some(ch) = state.peek() {
115 if ch == '"' {
116 state.advance(1); if state.peek() == Some('"') {
118 state.advance(1);
120 continue;
121 }
122 break;
123 }
124 state.advance(ch.len_utf8());
125 if ch == '\n' || ch == '\r' {
126 break;
127 }
128 }
129 state.add_token(AdaTokenType::StringLiteral, start, state.get_position());
130 return true;
131 }
132 false
133 }
134
135 fn lex_char_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
136 let start = state.get_position();
137 if state.peek() != Some('\'') {
138 return false;
139 }
140
141 state.advance(1); if let Some(c) = state.peek() {
144 state.advance(c.len_utf8())
145 }
146 else {
147 state.set_position(start);
148 return false;
149 }
150
151 if state.peek() == Some('\'') {
152 state.advance(1);
153 state.add_token(AdaTokenType::CharacterLiteral, start, state.get_position());
154 return true;
155 }
156 state.set_position(start);
157 false
158 }
159
160 fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
161 let start = state.get_position();
162
163 if let Some(ch) = state.peek() {
164 if ch.is_ascii_digit() {
165 state.advance(ch.len_utf8());
167 while let Some(ch) = state.peek() {
168 if ch.is_ascii_digit() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
169 }
170
171 if state.peek() == Some('.') {
173 state.advance(1);
174 while let Some(ch) = state.peek() {
175 if ch.is_ascii_digit() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
176 }
177 }
178
179 if let Some(ch) = state.peek() {
181 if ch == 'e' || ch == 'E' {
182 state.advance(1);
183 if let Some(sign) = state.peek() {
184 if sign == '+' || sign == '-' {
185 state.advance(1)
186 }
187 }
188 while let Some(ch) = state.peek() {
189 if ch.is_ascii_digit() { state.advance(ch.len_utf8()) } else { break }
190 }
191 }
192 }
193
194 state.add_token(AdaTokenType::NumberLiteral, start, state.get_position());
195 return true;
196 }
197 }
198 false
199 }
200
201 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
202 let start = state.get_position();
203
204 if let Some(ch) = state.peek() {
205 if ch.is_ascii_alphabetic() || ch == '_' {
206 state.advance(ch.len_utf8());
207
208 while let Some(ch) = state.peek() {
209 if ch.is_ascii_alphanumeric() || ch == '_' { state.advance(ch.len_utf8()) } else { break }
210 }
211
212 let end = state.get_position();
213 let text = state.get_text_in((start..end).into());
214 let kind = match text.to_lowercase().as_str() {
215 "abort" => AdaTokenType::Abort,
216 "abs" => AdaTokenType::Abs,
217 "abstract" => AdaTokenType::Abstract,
218 "accept" => AdaTokenType::Accept,
219 "access" => AdaTokenType::Access,
220 "aliased" => AdaTokenType::Aliased,
221 "all" => AdaTokenType::All,
222 "and" => AdaTokenType::And,
223 "array" => AdaTokenType::Array,
224 "at" => AdaTokenType::At,
225 "begin" => AdaTokenType::Begin,
226 "body" => AdaTokenType::Body,
227 "case" => AdaTokenType::Case,
228 "constant" => AdaTokenType::Constant,
229 "declare" => AdaTokenType::Declare,
230 "delay" => AdaTokenType::Delay,
231 "delta" => AdaTokenType::Delta,
232 "digits" => AdaTokenType::Digits,
233 "do" => AdaTokenType::Do,
234 "else" => AdaTokenType::Else,
235 "elsif" => AdaTokenType::Elsif,
236 "end" => AdaTokenType::End,
237 "entry" => AdaTokenType::Entry,
238 "exception" => AdaTokenType::Exception,
239 "exit" => AdaTokenType::Exit,
240 "for" => AdaTokenType::For,
241 "function" => AdaTokenType::Function,
242 "generic" => AdaTokenType::Generic,
243 "goto" => AdaTokenType::Goto,
244 "if" => AdaTokenType::If,
245 "in" => AdaTokenType::In,
246 "interface" => AdaTokenType::Interface,
247 "is" => AdaTokenType::Is,
248 "limited" => AdaTokenType::Limited,
249 "loop" => AdaTokenType::Loop,
250 "mod" => AdaTokenType::Mod,
251 "new" => AdaTokenType::New,
252 "not" => AdaTokenType::Not,
253 "null" => AdaTokenType::Null,
254 "of" => AdaTokenType::Of,
255 "or" => AdaTokenType::Or,
256 "others" => AdaTokenType::Others,
257 "out" => AdaTokenType::Out,
258 "overriding" => AdaTokenType::Overriding,
259 "package" => AdaTokenType::Package,
260 "pragma" => AdaTokenType::Pragma,
261 "private" => AdaTokenType::Private,
262 "procedure" => AdaTokenType::Procedure,
263 "protected" => AdaTokenType::Protected,
264 "raise" => AdaTokenType::Raise,
265 "range" => AdaTokenType::Range,
266 "record" => AdaTokenType::Record,
267 "rem" => AdaTokenType::Rem,
268 "renames" => AdaTokenType::Renames,
269 "requeue" => AdaTokenType::Requeue,
270 "return" => AdaTokenType::Return,
271 "reverse" => AdaTokenType::Reverse,
272 "select" => AdaTokenType::Select,
273 "separate" => AdaTokenType::Separate,
274 "some" => AdaTokenType::Some,
275 "subtype" => AdaTokenType::Subtype,
276 "synchronized" => AdaTokenType::Synchronized,
277 "tagged" => AdaTokenType::Tagged,
278 "task" => AdaTokenType::Task,
279 "terminate" => AdaTokenType::Terminate,
280 "then" => AdaTokenType::Then,
281 "type" => AdaTokenType::Type,
282 "until" => AdaTokenType::Until,
283 "use" => AdaTokenType::Use,
284 "when" => AdaTokenType::When,
285 "while" => AdaTokenType::While,
286 "with" => AdaTokenType::With,
287 "xor" => AdaTokenType::Xor,
288 _ => AdaTokenType::Identifier,
289 };
290
291 state.add_token(kind, start, end);
292 return true;
293 }
294 }
295 false
296 }
297
298 fn lex_operators<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
299 let start = state.get_position();
300
301 if state.consume_if_starts_with("**") {
303 state.add_token(AdaTokenType::StarStar, start, state.get_position());
304 return true;
305 }
306 if state.consume_if_starts_with("=>") {
307 state.add_token(AdaTokenType::Arrow, start, state.get_position());
308 return true;
309 }
310 if state.consume_if_starts_with("<=") {
311 state.add_token(AdaTokenType::Le, start, state.get_position());
312 return true;
313 }
314 if state.consume_if_starts_with(">=") {
315 state.add_token(AdaTokenType::Ge, start, state.get_position());
316 return true;
317 }
318 if state.consume_if_starts_with(":=") {
319 state.add_token(AdaTokenType::ColonEq, start, state.get_position());
320 return true;
321 }
322 if state.consume_if_starts_with("..") {
323 state.add_token(AdaTokenType::DotDot, start, state.get_position());
324 return true;
325 }
326 if state.consume_if_starts_with("/=") {
327 state.add_token(AdaTokenType::Ne, start, state.get_position());
328 return true;
329 }
330 if state.consume_if_starts_with("<<") {
331 state.add_token(AdaTokenType::LtLt, start, state.get_position());
332 return true;
333 }
334 if state.consume_if_starts_with(">>") {
335 state.add_token(AdaTokenType::GtGt, start, state.get_position());
336 return true;
337 }
338 if state.consume_if_starts_with("<>") {
339 state.add_token(AdaTokenType::Box, start, state.get_position());
340 return true;
341 }
342
343 if let Some(ch) = state.peek() {
345 let kind = match ch {
346 '+' => AdaTokenType::Plus,
347 '-' => AdaTokenType::Minus,
348 '*' => AdaTokenType::Star,
349 '/' => AdaTokenType::Slash,
350 '=' => AdaTokenType::Eq,
351 '<' => AdaTokenType::Lt,
352 '>' => AdaTokenType::Gt,
353 '&' => AdaTokenType::Ampersand,
354 '|' => AdaTokenType::Pipe,
355 _ => return false,
356 };
357 state.advance(1);
358 state.add_token(kind, start, state.get_position());
359 return true;
360 }
361 false
362 }
363
364 fn lex_single_char_tokens<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
365 let start = state.get_position();
366
367 if let Some(ch) = state.peek() {
368 let kind = match ch {
369 '(' => AdaTokenType::LeftParen,
370 ')' => AdaTokenType::RightParen,
371 '[' => AdaTokenType::LeftBracket,
372 ']' => AdaTokenType::RightBracket,
373 '{' => AdaTokenType::LeftBrace,
374 '}' => AdaTokenType::RightBrace,
375 ',' => AdaTokenType::Comma,
376 ';' => AdaTokenType::Semicolon,
377 ':' => AdaTokenType::Colon,
378 '.' => AdaTokenType::Dot,
379 '\'' => AdaTokenType::Apostrophe,
380 _ => return false,
381 };
382 state.advance(1);
383 state.add_token(kind, start, state.get_position());
384 return true;
385 }
386 false
387 }
388}