1use crate::{kind::PascalSyntaxKind, language::PascalLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{CommentConfig, LexOutput, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'s, S> = LexerState<'s, S, PascalLanguage>;
10
11static PASCAL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static PASCAL_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "{", block_end: "}", nested_blocks: false });
13
14#[derive(Clone, Debug)]
15pub struct PascalLexer<'config> {
16 _config: &'config PascalLanguage,
17}
18
19impl<'config> PascalLexer<'config> {
20 pub fn new(config: &'config PascalLanguage) -> Self {
21 Self { _config: config }
22 }
23
24 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
25 PASCAL_WHITESPACE.scan(state, PascalSyntaxKind::Whitespace)
26 }
27
28 fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
29 let start = state.get_position();
30
31 if state.rest().starts_with("//") {
33 return PASCAL_COMMENT.scan(state, PascalSyntaxKind::Comment, PascalSyntaxKind::Comment);
34 }
35
36 if state.current() == Some('{') {
38 state.advance(1);
39 while let Some(ch) = state.peek() {
40 if ch == '}' {
41 state.advance(1);
42 break;
43 }
44 state.advance(ch.len_utf8());
45 }
46 state.add_token(PascalSyntaxKind::Comment, start, state.get_position());
47 return true;
48 }
49
50 if state.rest().starts_with("(*") {
52 state.advance(2);
53 while let Some(ch) = state.peek() {
54 if ch == '*' && state.peek_next_n(1) == Some(')') {
55 state.advance(2);
56 break;
57 }
58 state.advance(ch.len_utf8());
59 }
60 state.add_token(PascalSyntaxKind::Comment, start, state.get_position());
61 return true;
62 }
63
64 false
65 }
66
67 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
68 let start = state.get_position();
69
70 if state.current() == Some('\'') {
72 state.advance(1);
73 while let Some(ch) = state.peek() {
74 if ch == '\'' {
75 if state.peek_next_n(1) == Some('\'') {
77 state.advance(2); continue;
79 }
80 else {
81 state.advance(1); break;
83 }
84 }
85 state.advance(ch.len_utf8());
86 }
87 state.add_token(PascalSyntaxKind::StringLiteral, start, state.get_position());
88 return true;
89 }
90 false
91 }
92
93 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
94 if let Some(ch) = state.peek() {
95 if ch.is_alphabetic() || ch == '_' {
96 let start_pos = state.get_position();
97 let mut text = String::new();
98
99 while let Some(ch) = state.peek() {
101 if ch.is_alphanumeric() || ch == '_' {
102 text.push(ch);
103 state.advance(ch.len_utf8());
104 }
105 else {
106 break;
107 }
108 }
109
110 let kind = match text.to_lowercase().as_str() {
112 "program" => PascalSyntaxKind::Program,
113 "var" => PascalSyntaxKind::Var,
114 "const" => PascalSyntaxKind::Const,
115 "type" => PascalSyntaxKind::Type,
116 "procedure" => PascalSyntaxKind::Procedure,
117 "function" => PascalSyntaxKind::Function,
118 "begin" => PascalSyntaxKind::Begin,
119 "end" => PascalSyntaxKind::End,
120 "if" => PascalSyntaxKind::If,
121 "then" => PascalSyntaxKind::Then,
122 "else" => PascalSyntaxKind::Else,
123 "while" => PascalSyntaxKind::While,
124 "do" => PascalSyntaxKind::Do,
125 "for" => PascalSyntaxKind::For,
126 "to" => PascalSyntaxKind::To,
127 "downto" => PascalSyntaxKind::Downto,
128 "repeat" => PascalSyntaxKind::Repeat,
129 "until" => PascalSyntaxKind::Until,
130 "case" => PascalSyntaxKind::Case,
131 "of" => PascalSyntaxKind::Of,
132 "with" => PascalSyntaxKind::With,
133 "record" => PascalSyntaxKind::Record,
134 "array" => PascalSyntaxKind::Array,
135 "set" => PascalSyntaxKind::Set,
136 "file" => PascalSyntaxKind::File,
137 "packed" => PascalSyntaxKind::Packed,
138 "nil" => PascalSyntaxKind::Nil,
139 "true" => PascalSyntaxKind::True,
140 "false" => PascalSyntaxKind::False,
141 "and" => PascalSyntaxKind::And,
142 "or" => PascalSyntaxKind::Or,
143 "not" => PascalSyntaxKind::Not,
144 "div" => PascalSyntaxKind::Div,
145 "mod" => PascalSyntaxKind::Mod,
146 "in" => PascalSyntaxKind::In,
147
148 _ => PascalSyntaxKind::Identifier,
149 };
150
151 state.add_token(kind, start_pos, state.get_position());
152 true
153 }
154 else {
155 false
156 }
157 }
158 else {
159 false
160 }
161 }
162
163 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
164 if let Some(ch) = state.peek() {
165 if ch.is_ascii_digit() {
166 let start_pos = state.get_position();
167 let mut has_dot = false;
168
169 while let Some(ch) = state.peek() {
171 if ch.is_ascii_digit() {
172 state.advance(1);
173 }
174 else if ch == '.' && !has_dot {
175 has_dot = true;
176 state.advance(1);
177 }
178 else {
179 break;
180 }
181 }
182
183 let kind = if has_dot { PascalSyntaxKind::RealLiteral } else { PascalSyntaxKind::IntegerLiteral };
184
185 state.add_token(kind, start_pos, state.get_position());
186 true
187 }
188 else {
189 false
190 }
191 }
192 else {
193 false
194 }
195 }
196
197 fn lex_operators_and_punctuation<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
198 if let Some(ch) = state.peek() {
199 let start_pos = state.get_position();
200
201 let kind = match ch {
202 '+' => {
203 state.advance(1);
204 PascalSyntaxKind::Plus
205 }
206 '-' => {
207 state.advance(1);
208 PascalSyntaxKind::Minus
209 }
210 '*' => {
211 state.advance(1);
212 PascalSyntaxKind::Multiply
213 }
214 '/' => {
215 state.advance(1);
216 PascalSyntaxKind::Divide
217 }
218 '=' => {
219 state.advance(1);
220 PascalSyntaxKind::Equal
221 }
222 '<' => {
223 state.advance(1);
224 if let Some('=') = state.peek() {
225 state.advance(1);
226 PascalSyntaxKind::LessEqual
227 }
228 else if let Some('>') = state.peek() {
229 state.advance(1);
230 PascalSyntaxKind::NotEqual
231 }
232 else {
233 PascalSyntaxKind::Less
234 }
235 }
236 '>' => {
237 state.advance(1);
238 if let Some('=') = state.peek() {
239 state.advance(1);
240 PascalSyntaxKind::GreaterEqual
241 }
242 else {
243 PascalSyntaxKind::Greater
244 }
245 }
246 ':' => {
247 state.advance(1);
248 if let Some('=') = state.peek() {
249 state.advance(1);
250 PascalSyntaxKind::Assign
251 }
252 else {
253 PascalSyntaxKind::Colon
254 }
255 }
256 ';' => {
257 state.advance(1);
258 PascalSyntaxKind::Semicolon
259 }
260 ',' => {
261 state.advance(1);
262 PascalSyntaxKind::Comma
263 }
264 '.' => {
265 state.advance(1);
266 if let Some('.') = state.peek() {
267 state.advance(1);
268 PascalSyntaxKind::Range
269 }
270 else {
271 PascalSyntaxKind::Dot
272 }
273 }
274 '(' => {
275 state.advance(1);
276 PascalSyntaxKind::LeftParen
277 }
278 ')' => {
279 state.advance(1);
280 PascalSyntaxKind::RightParen
281 }
282 '[' => {
283 state.advance(1);
284 PascalSyntaxKind::LeftBracket
285 }
286 ']' => {
287 state.advance(1);
288 PascalSyntaxKind::RightBracket
289 }
290 '^' => {
291 state.advance(1);
292 PascalSyntaxKind::Caret
293 }
294 '\n' => {
295 state.advance(1);
296 PascalSyntaxKind::Newline
297 }
298 _ => {
299 state.advance(ch.len_utf8());
300 PascalSyntaxKind::Error
301 }
302 };
303
304 state.add_token(kind, start_pos, state.get_position());
305 true
306 }
307 else {
308 false
309 }
310 }
311}
312
313impl Lexer<PascalLanguage> for PascalLexer<'_> {
314 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<PascalLanguage>) -> LexOutput<PascalLanguage> {
315 let mut state = State::new(source);
316 let result = self.run(&mut state);
317 if result.is_ok() {
318 state.add_eof();
319 }
320 state.finish_with_cache(result, cache)
321 }
322}
323
324impl PascalLexer<'_> {
325 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
326 let safe_point = state.get_position();
327 while state.not_at_end() {
328 if self.skip_whitespace(state) {
330 continue;
331 }
332
333 if self.skip_comment(state) {
335 continue;
336 }
337
338 if self.lex_string(state) {
340 continue;
341 }
342
343 if self.lex_identifier_or_keyword(state) {
345 continue;
346 }
347
348 if self.lex_number(state) {
350 continue;
351 }
352
353 if self.lex_operators_and_punctuation(state) {
355 continue;
356 }
357
358 let start_pos = state.get_position();
360 if let Some(ch) = state.peek() {
361 state.advance(ch.len_utf8());
362 state.add_token(PascalSyntaxKind::Error, start_pos, state.get_position());
363 }
364
365 state.advance_if_dead_lock(safe_point);
366 }
367
368 Ok(())
370 }
371}