1use crate::{kind::AdaSyntaxKind, language::AdaLanguage};
2use oak_core::{
3 IncrementalCache, Lexer, LexerState, OakError,
4 lexer::{CommentLine, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, AdaLanguage>;
10
11static ADA_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static ADA_COMMENT: LazyLock<CommentLine> = LazyLock::new(|| CommentLine { line_markers: &["--"] });
13static ADA_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: None });
14static ADA_CHAR: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['\''], escape: None });
15
16#[derive(Clone)]
17pub struct AdaLexer<'config> {
18 config: &'config AdaLanguage,
19}
20
21impl<'config> Lexer<AdaLanguage> for AdaLexer<'config> {
22 fn lex_incremental(
23 &self,
24 source: impl Source,
25 changed: usize,
26 cache: IncrementalCache<AdaLanguage>,
27 ) -> LexOutput<AdaLanguage> {
28 let mut state = LexerState::new_with_cache(source, changed, cache);
29 let result = self.run(&mut state);
30 state.finish(result)
31 }
32}
33
34impl<'config> AdaLexer<'config> {
35 pub fn new(config: &'config AdaLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
41 while state.not_at_end() {
42 let safe_point = state.get_position();
43
44 if self.skip_whitespace(state) {
45 continue;
46 }
47
48 if self.skip_comment(state) {
49 continue;
50 }
51
52 if self.lex_string_literal(state) {
53 continue;
54 }
55
56 if self.lex_char_literal(state) {
57 continue;
58 }
59
60 if self.lex_number_literal(state) {
61 continue;
62 }
63
64 if self.lex_identifier_or_keyword(state) {
65 continue;
66 }
67
68 if self.lex_operators(state) {
69 continue;
70 }
71
72 if self.lex_single_char_tokens(state) {
73 continue;
74 }
75
76 if let Some(ch) = state.peek() {
78 state.advance(ch.len_utf8());
79 state.add_token(AdaSyntaxKind::Error, safe_point, state.get_position());
80 }
81 }
82
83 let eof_pos = state.get_position();
85 state.add_token(AdaSyntaxKind::Eof, eof_pos, eof_pos);
86 Ok(())
87 }
88
89 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
91 match ADA_WHITESPACE.scan(state.rest(), state.get_position(), AdaSyntaxKind::Whitespace) {
92 Some(token) => {
93 state.advance_with(token);
94 return true;
95 }
96 None => {}
97 }
98 false
99 }
100
101 fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
102 let start = state.get_position();
103 let rest = state.rest();
104
105 if rest.starts_with("--") {
107 state.advance(2);
108 while let Some(ch) = state.peek() {
109 if ch == '\n' || ch == '\r' {
110 break;
111 }
112 state.advance(ch.len_utf8());
113 }
114 state.add_token(AdaSyntaxKind::Comment, start, state.get_position());
115 return true;
116 }
117 false
118 }
119
120 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
121 let start = state.get_position();
122
123 if state.current() == Some('"') {
125 state.advance(1);
126 while let Some(ch) = state.peek() {
127 if ch == '"' {
128 state.advance(1); break;
130 }
131 state.advance(ch.len_utf8());
132 if ch == '\n' || ch == '\r' {
133 break;
134 }
135 }
136 state.add_token(AdaSyntaxKind::StringLiteral, start, state.get_position());
137 return true;
138 }
139 false
140 }
141
142 fn lex_char_literal<S: Source>(&self, state: &mut State<S>) -> bool {
143 let start = state.get_position();
144 if state.current() != Some('\'') {
145 return false;
146 }
147
148 state.advance(1); if let Some(c) = state.peek() {
151 state.advance(c.len_utf8());
152 }
153 else {
154 state.set_position(start);
155 return false;
156 }
157
158 if state.peek() == Some('\'') {
159 state.advance(1);
160 state.add_token(AdaSyntaxKind::CharacterLiteral, start, state.get_position());
161 return true;
162 }
163 state.set_position(start);
164 false
165 }
166
167 fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
168 let start = state.get_position();
169
170 if let Some(ch) = state.current() {
171 if ch.is_ascii_digit() {
172 state.advance(ch.len_utf8());
174 while let Some(ch) = state.current() {
175 if ch.is_ascii_digit() || ch == '_' {
176 state.advance(ch.len_utf8());
177 }
178 else {
179 break;
180 }
181 }
182
183 if state.current() == Some('.') {
185 state.advance(1);
186 while let Some(ch) = state.current() {
187 if ch.is_ascii_digit() || ch == '_' {
188 state.advance(ch.len_utf8());
189 }
190 else {
191 break;
192 }
193 }
194 }
195
196 if let Some(ch) = state.current() {
198 if ch == 'e' || ch == 'E' {
199 state.advance(1);
200 if let Some(sign) = state.current() {
201 if sign == '+' || sign == '-' {
202 state.advance(1);
203 }
204 }
205 while let Some(ch) = state.current() {
206 if ch.is_ascii_digit() {
207 state.advance(ch.len_utf8());
208 }
209 else {
210 break;
211 }
212 }
213 }
214 }
215
216 state.add_token(AdaSyntaxKind::NumberLiteral, start, state.get_position());
217 return true;
218 }
219 }
220 false
221 }
222
223 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
224 let start = state.get_position();
225
226 if let Some(ch) = state.current() {
227 if ch.is_ascii_alphabetic() || ch == '_' {
228 state.advance(ch.len_utf8());
229
230 while let Some(ch) = state.current() {
231 if ch.is_ascii_alphanumeric() || ch == '_' {
232 state.advance(ch.len_utf8());
233 }
234 else {
235 break;
236 }
237 }
238
239 let text = state.get_text_in((start..state.get_position()).into());
240 let kind = match text.to_lowercase().as_str() {
241 "abort" => AdaSyntaxKind::AbortKeyword,
242 "abs" => AdaSyntaxKind::AbsKeyword,
243 "abstract" => AdaSyntaxKind::AbstractKeyword,
244 "accept" => AdaSyntaxKind::AcceptKeyword,
245 "access" => AdaSyntaxKind::AccessKeyword,
246 "aliased" => AdaSyntaxKind::AliasedKeyword,
247 "all" => AdaSyntaxKind::AllKeyword,
248 "and" => AdaSyntaxKind::AndKeyword,
249 "array" => AdaSyntaxKind::ArrayKeyword,
250 "at" => AdaSyntaxKind::AtKeyword,
251 "begin" => AdaSyntaxKind::BeginKeyword,
252 "body" => AdaSyntaxKind::BodyKeyword,
253 "case" => AdaSyntaxKind::CaseKeyword,
254 "constant" => AdaSyntaxKind::ConstantKeyword,
255 "declare" => AdaSyntaxKind::DeclareKeyword,
256 "delay" => AdaSyntaxKind::DelayKeyword,
257 "delta" => AdaSyntaxKind::DeltaKeyword,
258 "digits" => AdaSyntaxKind::DigitsKeyword,
259 "do" => AdaSyntaxKind::DoKeyword,
260 "else" => AdaSyntaxKind::ElseKeyword,
261 "elsif" => AdaSyntaxKind::ElsifKeyword,
262 "end" => AdaSyntaxKind::EndKeyword,
263 "entry" => AdaSyntaxKind::EntryKeyword,
264 "exception" => AdaSyntaxKind::ExceptionKeyword,
265 "exit" => AdaSyntaxKind::ExitKeyword,
266 "for" => AdaSyntaxKind::ForKeyword,
267 "function" => AdaSyntaxKind::FunctionKeyword,
268 "generic" => AdaSyntaxKind::GenericKeyword,
269 "goto" => AdaSyntaxKind::GotoKeyword,
270 "if" => AdaSyntaxKind::IfKeyword,
271 "in" => AdaSyntaxKind::InKeyword,
272 "interface" => AdaSyntaxKind::InterfaceKeyword,
273 "is" => AdaSyntaxKind::IsKeyword,
274 "limited" => AdaSyntaxKind::LimitedKeyword,
275 "loop" => AdaSyntaxKind::LoopKeyword,
276 "mod" => AdaSyntaxKind::ModKeyword,
277 "new" => AdaSyntaxKind::NewKeyword,
278 "not" => AdaSyntaxKind::NotKeyword,
279 "null" => AdaSyntaxKind::NullKeyword,
280 "of" => AdaSyntaxKind::OfKeyword,
281 "or" => AdaSyntaxKind::OrKeyword,
282 "others" => AdaSyntaxKind::OthersKeyword,
283 "out" => AdaSyntaxKind::OutKeyword,
284 "overriding" => AdaSyntaxKind::OverridingKeyword,
285 "package" => AdaSyntaxKind::PackageKeyword,
286 "pragma" => AdaSyntaxKind::PragmaKeyword,
287 "private" => AdaSyntaxKind::PrivateKeyword,
288 "procedure" => AdaSyntaxKind::ProcedureKeyword,
289 "protected" => AdaSyntaxKind::ProtectedKeyword,
290 "raise" => AdaSyntaxKind::RaiseKeyword,
291 "range" => AdaSyntaxKind::RangeKeyword,
292 "record" => AdaSyntaxKind::RecordKeyword,
293 "rem" => AdaSyntaxKind::RemKeyword,
294 "renames" => AdaSyntaxKind::RenamesKeyword,
295 "requeue" => AdaSyntaxKind::RequeueKeyword,
296 "return" => AdaSyntaxKind::ReturnKeyword,
297 "reverse" => AdaSyntaxKind::ReverseKeyword,
298 "select" => AdaSyntaxKind::SelectKeyword,
299 "separate" => AdaSyntaxKind::SeparateKeyword,
300 "subtype" => AdaSyntaxKind::SubtypeKeyword,
301 "synchronized" => AdaSyntaxKind::SynchronizedKeyword,
302 "tagged" => AdaSyntaxKind::TaggedKeyword,
303 "task" => AdaSyntaxKind::TaskKeyword,
304 "terminate" => AdaSyntaxKind::TerminateKeyword,
305 "then" => AdaSyntaxKind::ThenKeyword,
306 "type" => AdaSyntaxKind::TypeKeyword,
307 "until" => AdaSyntaxKind::UntilKeyword,
308 "use" => AdaSyntaxKind::UseKeyword,
309 "when" => AdaSyntaxKind::WhenKeyword,
310 "while" => AdaSyntaxKind::WhileKeyword,
311 "with" => AdaSyntaxKind::WithKeyword,
312 "xor" => AdaSyntaxKind::XorKeyword,
313 _ => AdaSyntaxKind::Identifier,
314 };
315
316 state.add_token(kind, start, state.get_position());
317 return true;
318 }
319 }
320 false
321 }
322
323 fn lex_operators<S: Source>(&self, state: &mut State<S>) -> bool {
324 let start = state.get_position();
325 let rest = state.rest();
326
327 if rest.starts_with("**") {
329 state.advance(2);
330 state.add_token(AdaSyntaxKind::DoubleStar, start, state.get_position());
331 return true;
332 }
333 if rest.starts_with("=>") {
334 state.advance(2);
335 state.add_token(AdaSyntaxKind::Arrow, start, state.get_position());
336 return true;
337 }
338 if rest.starts_with("<=") {
339 state.advance(2);
340 state.add_token(AdaSyntaxKind::LessEqual, start, state.get_position());
341 return true;
342 }
343 if rest.starts_with(">=") {
344 state.advance(2);
345 state.add_token(AdaSyntaxKind::GreaterEqual, start, state.get_position());
346 return true;
347 }
348 if rest.starts_with(":=") {
349 state.advance(2);
350 state.add_token(AdaSyntaxKind::ColonEqual, start, state.get_position());
351 return true;
352 }
353 if rest.starts_with("..") {
354 state.advance(2);
355 state.add_token(AdaSyntaxKind::DotDot, start, state.get_position());
356 return true;
357 }
358 if rest.starts_with("/=") {
359 state.advance(2);
360 state.add_token(AdaSyntaxKind::NotEqual, start, state.get_position());
361 return true;
362 }
363
364 if let Some(ch) = state.current() {
366 let kind = match ch {
367 '+' => AdaSyntaxKind::Plus,
368 '-' => AdaSyntaxKind::Minus,
369 '*' => AdaSyntaxKind::Star,
370 '/' => AdaSyntaxKind::Slash,
371 '=' => AdaSyntaxKind::Equal,
372 '<' => AdaSyntaxKind::Less,
373 '>' => AdaSyntaxKind::Greater,
374 '&' => AdaSyntaxKind::Ampersand,
375 '|' => AdaSyntaxKind::Pipe,
376 _ => return false,
377 };
378 state.advance(1);
379 state.add_token(kind, start, state.get_position());
380 return true;
381 }
382 false
383 }
384
385 fn lex_single_char_tokens<S: Source>(&self, state: &mut State<S>) -> bool {
386 let start = state.get_position();
387
388 if let Some(ch) = state.current() {
389 let kind = match ch {
390 '(' => AdaSyntaxKind::LeftParen,
391 ')' => AdaSyntaxKind::RightParen,
392 '[' => AdaSyntaxKind::LeftBracket,
393 ']' => AdaSyntaxKind::RightBracket,
394 '{' => AdaSyntaxKind::LeftBrace,
395 '}' => AdaSyntaxKind::RightBrace,
396 ',' => AdaSyntaxKind::Comma,
397 ';' => AdaSyntaxKind::Semicolon,
398 ':' => AdaSyntaxKind::Colon,
399 '.' => AdaSyntaxKind::Dot,
400 _ => return false,
401 };
402 state.advance(1);
403 state.add_token(kind, start, state.get_position());
404 return true;
405 }
406 false
407 }
408}