1use crate::{kind::PascalSyntaxKind, language::PascalLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError,
4 lexer::{CommentConfig, LexOutput, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'s, S> = LexerState<'s, S, PascalLanguage>;
10
11static PASCAL_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static PASCAL_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "//", block_start: "{", block_end: "}", nested_blocks: false });
13
14#[derive(Clone, Default)]
15pub struct PascalLexer;
16
17impl PascalLexer {
18 pub fn new(_config: &PascalLanguage) -> Self {
19 Self
20 }
21
22 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
23 PASCAL_WHITESPACE.scan(state, PascalSyntaxKind::Whitespace)
24 }
25
26 fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
27 let start = state.get_position();
28
29 if state.rest().starts_with("//") {
31 return PASCAL_COMMENT.scan(state, PascalSyntaxKind::Comment, PascalSyntaxKind::Comment);
32 }
33
34 if state.current() == Some('{') {
36 state.advance(1);
37 while let Some(ch) = state.peek() {
38 if ch == '}' {
39 state.advance(1);
40 break;
41 }
42 state.advance(ch.len_utf8());
43 }
44 state.add_token(PascalSyntaxKind::Comment, start, state.get_position());
45 return true;
46 }
47
48 if state.rest().starts_with("(*") {
50 state.advance(2);
51 while let Some(ch) = state.peek() {
52 if ch == '*' && state.peek_next_n(1) == Some(')') {
53 state.advance(2);
54 break;
55 }
56 state.advance(ch.len_utf8());
57 }
58 state.add_token(PascalSyntaxKind::Comment, start, state.get_position());
59 return true;
60 }
61
62 false
63 }
64
65 fn lex_string<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
66 let start = state.get_position();
67
68 if state.current() == Some('\'') {
70 state.advance(1);
71 while let Some(ch) = state.peek() {
72 if ch == '\'' {
73 if state.peek_next_n(1) == Some('\'') {
75 state.advance(2); continue;
77 }
78 else {
79 state.advance(1); break;
81 }
82 }
83 state.advance(ch.len_utf8());
84 }
85 state.add_token(PascalSyntaxKind::StringLiteral, start, state.get_position());
86 return true;
87 }
88 false
89 }
90
91 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
92 if let Some(ch) = state.peek() {
93 if ch.is_alphabetic() || ch == '_' {
94 let start_pos = state.get_position();
95 let mut text = String::new();
96
97 while let Some(ch) = state.peek() {
99 if ch.is_alphanumeric() || ch == '_' {
100 text.push(ch);
101 state.advance(ch.len_utf8());
102 }
103 else {
104 break;
105 }
106 }
107
108 let kind = match text.to_lowercase().as_str() {
110 "program" => PascalSyntaxKind::Program,
111 "var" => PascalSyntaxKind::Var,
112 "const" => PascalSyntaxKind::Const,
113 "type" => PascalSyntaxKind::Type,
114 "procedure" => PascalSyntaxKind::Procedure,
115 "function" => PascalSyntaxKind::Function,
116 "begin" => PascalSyntaxKind::Begin,
117 "end" => PascalSyntaxKind::End,
118 "if" => PascalSyntaxKind::If,
119 "then" => PascalSyntaxKind::Then,
120 "else" => PascalSyntaxKind::Else,
121 "while" => PascalSyntaxKind::While,
122 "do" => PascalSyntaxKind::Do,
123 "for" => PascalSyntaxKind::For,
124 "to" => PascalSyntaxKind::To,
125 "downto" => PascalSyntaxKind::Downto,
126 "repeat" => PascalSyntaxKind::Repeat,
127 "until" => PascalSyntaxKind::Until,
128 "case" => PascalSyntaxKind::Case,
129 "of" => PascalSyntaxKind::Of,
130 "with" => PascalSyntaxKind::With,
131 "record" => PascalSyntaxKind::Record,
132 "array" => PascalSyntaxKind::Array,
133 "set" => PascalSyntaxKind::Set,
134 "file" => PascalSyntaxKind::File,
135 "packed" => PascalSyntaxKind::Packed,
136 "nil" => PascalSyntaxKind::Nil,
137 "true" => PascalSyntaxKind::True,
138 "false" => PascalSyntaxKind::False,
139 "and" => PascalSyntaxKind::And,
140 "or" => PascalSyntaxKind::Or,
141 "not" => PascalSyntaxKind::Not,
142 "div" => PascalSyntaxKind::Div,
143 "mod" => PascalSyntaxKind::Mod,
144 "in" => PascalSyntaxKind::In,
145
146 _ => PascalSyntaxKind::Identifier,
147 };
148
149 state.add_token(kind, start_pos, state.get_position());
150 true
151 }
152 else {
153 false
154 }
155 }
156 else {
157 false
158 }
159 }
160
161 fn lex_number<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
162 if let Some(ch) = state.peek() {
163 if ch.is_ascii_digit() {
164 let start_pos = state.get_position();
165 let mut has_dot = false;
166
167 while let Some(ch) = state.peek() {
169 if ch.is_ascii_digit() {
170 state.advance(1);
171 }
172 else if ch == '.' && !has_dot {
173 has_dot = true;
174 state.advance(1);
175 }
176 else {
177 break;
178 }
179 }
180
181 let kind = if has_dot { PascalSyntaxKind::RealLiteral } else { PascalSyntaxKind::IntegerLiteral };
182
183 state.add_token(kind, start_pos, state.get_position());
184 true
185 }
186 else {
187 false
188 }
189 }
190 else {
191 false
192 }
193 }
194
195 fn lex_operators_and_punctuation<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
196 if let Some(ch) = state.peek() {
197 let start_pos = state.get_position();
198
199 let kind = match ch {
200 '+' => {
201 state.advance(1);
202 PascalSyntaxKind::Plus
203 }
204 '-' => {
205 state.advance(1);
206 PascalSyntaxKind::Minus
207 }
208 '*' => {
209 state.advance(1);
210 PascalSyntaxKind::Multiply
211 }
212 '/' => {
213 state.advance(1);
214 PascalSyntaxKind::Divide
215 }
216 '=' => {
217 state.advance(1);
218 PascalSyntaxKind::Equal
219 }
220 '<' => {
221 state.advance(1);
222 if let Some('=') = state.peek() {
223 state.advance(1);
224 PascalSyntaxKind::LessEqual
225 }
226 else if let Some('>') = state.peek() {
227 state.advance(1);
228 PascalSyntaxKind::NotEqual
229 }
230 else {
231 PascalSyntaxKind::Less
232 }
233 }
234 '>' => {
235 state.advance(1);
236 if let Some('=') = state.peek() {
237 state.advance(1);
238 PascalSyntaxKind::GreaterEqual
239 }
240 else {
241 PascalSyntaxKind::Greater
242 }
243 }
244 ':' => {
245 state.advance(1);
246 if let Some('=') = state.peek() {
247 state.advance(1);
248 PascalSyntaxKind::Assign
249 }
250 else {
251 PascalSyntaxKind::Colon
252 }
253 }
254 ';' => {
255 state.advance(1);
256 PascalSyntaxKind::Semicolon
257 }
258 ',' => {
259 state.advance(1);
260 PascalSyntaxKind::Comma
261 }
262 '.' => {
263 state.advance(1);
264 if let Some('.') = state.peek() {
265 state.advance(1);
266 PascalSyntaxKind::Range
267 }
268 else {
269 PascalSyntaxKind::Dot
270 }
271 }
272 '(' => {
273 state.advance(1);
274 PascalSyntaxKind::LeftParen
275 }
276 ')' => {
277 state.advance(1);
278 PascalSyntaxKind::RightParen
279 }
280 '[' => {
281 state.advance(1);
282 PascalSyntaxKind::LeftBracket
283 }
284 ']' => {
285 state.advance(1);
286 PascalSyntaxKind::RightBracket
287 }
288 '^' => {
289 state.advance(1);
290 PascalSyntaxKind::Caret
291 }
292 '\n' => {
293 state.advance(1);
294 PascalSyntaxKind::Newline
295 }
296 _ => {
297 state.advance(ch.len_utf8());
298 PascalSyntaxKind::Error
299 }
300 };
301
302 state.add_token(kind, start_pos, state.get_position());
303 true
304 }
305 else {
306 false
307 }
308 }
309}
310
311impl Lexer<PascalLanguage> for PascalLexer {
312 fn lex<'a, S: Source + ?Sized>(&self, source: &'a S, _edits: &[oak_core::source::TextEdit], cache: &'a mut impl LexerCache<PascalLanguage>) -> LexOutput<PascalLanguage> {
313 let mut state = State::new(source);
314 let result = self.run(&mut state);
315 if result.is_ok() {
316 state.add_eof();
317 }
318 state.finish_with_cache(result, cache)
319 }
320}
321
322impl PascalLexer {
323 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
324 let safe_point = state.get_position();
325 while state.not_at_end() {
326 if self.skip_whitespace(state) {
328 continue;
329 }
330
331 if self.skip_comment(state) {
333 continue;
334 }
335
336 if self.lex_string(state) {
338 continue;
339 }
340
341 if self.lex_identifier_or_keyword(state) {
343 continue;
344 }
345
346 if self.lex_number(state) {
348 continue;
349 }
350
351 if self.lex_operators_and_punctuation(state) {
353 continue;
354 }
355
356 let start_pos = state.get_position();
358 if let Some(ch) = state.peek() {
359 state.advance(ch.len_utf8());
360 state.add_token(PascalSyntaxKind::Error, start_pos, state.get_position());
361 }
362
363 state.advance_if_dead_lock(safe_point);
364 }
365
366 Ok(())
368 }
369}