1use crate::{kind::VampireSyntaxKind, language::VampireLanguage};
2use oak_core::{
3 IncrementalCache, Lexer, LexerState, OakError,
4 lexer::{CommentLine, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<S> = LexerState<S, VampireLanguage>;
10
11static VAMPIRE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static VAMPIRE_COMMENT: LazyLock<CommentLine> = LazyLock::new(|| CommentLine { line_markers: &["%"] });
13static VAMPIRE_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct VampireLexer<'config> {
17 config: &'config VampireLanguage,
18}
19
20impl<'config> Lexer<VampireLanguage> for VampireLexer<'config> {
21 fn lex_incremental(
22 &self,
23 source: impl Source,
24 changed: usize,
25 cache: IncrementalCache<VampireLanguage>,
26 ) -> LexOutput<VampireLanguage> {
27 let mut state = LexerState::new_with_cache(source, changed, cache);
28 let result = self.run(&mut state);
29 state.finish(result)
30 }
31}
32
33impl<'config> VampireLexer<'config> {
34 pub fn new(config: &'config VampireLanguage) -> Self {
35 Self { config }
36 }
37
38 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
40 while state.not_at_end() {
41 let safe_point = state.get_position();
42
43 if self.skip_whitespace(state) {
44 continue;
45 }
46
47 if self.skip_comment(state) {
48 continue;
49 }
50
51 if self.lex_string_literal(state) {
52 continue;
53 }
54
55 if self.lex_number_literal(state) {
56 continue;
57 }
58
59 if self.lex_identifier_or_keyword(state) {
60 continue;
61 }
62
63 if self.lex_operators(state) {
64 continue;
65 }
66
67 if self.lex_single_char_tokens(state) {
68 continue;
69 }
70
71 state.safe_check(safe_point);
72 }
73
74 let eof_pos = state.get_position();
76 state.add_token(VampireSyntaxKind::Eof, eof_pos, eof_pos);
77 Ok(())
78 }
79
80 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
82 match VAMPIRE_WHITESPACE.scan(state.rest(), state.get_position(), VampireSyntaxKind::Whitespace) {
83 Some(token) => {
84 state.advance_with(token);
85 true
86 }
87 None => false,
88 }
89 }
90
91 fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
93 let start = state.get_position();
94 let rest = state.rest();
95
96 if rest.starts_with("%") {
98 state.advance(1);
99 while let Some(ch) = state.peek() {
100 if ch == '\n' || ch == '\r' {
101 break;
102 }
103 state.advance(ch.len_utf8());
104 }
105 state.add_token(VampireSyntaxKind::LineComment, start, state.get_position());
106 return true;
107 }
108
109 if rest.starts_with("/*") {
111 state.advance(2);
112 while let Some(ch) = state.peek() {
113 if ch == '*' && state.peek_next_n(1) == Some('/') {
114 state.advance(2);
115 break;
116 }
117 state.advance(ch.len_utf8());
118 }
119 state.add_token(VampireSyntaxKind::BlockComment, start, state.get_position());
120 return true;
121 }
122
123 false
124 }
125
126 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
128 let start = state.get_position();
129
130 if state.current() == Some('"') {
131 state.advance(1);
132 let mut escaped = false;
133 while let Some(ch) = state.peek() {
134 if ch == '"' && !escaped {
135 state.advance(1); break;
137 }
138 state.advance(ch.len_utf8());
139 if escaped {
140 escaped = false;
141 continue;
142 }
143 if ch == '\\' {
144 escaped = true;
145 continue;
146 }
147 if ch == '\n' || ch == '\r' {
148 break;
149 }
150 }
151 state.add_token(VampireSyntaxKind::StringLiteral, start, state.get_position());
152 return true;
153 }
154 false
155 }
156
157 fn lex_number_literal<S: Source>(&self, state: &mut State<S>) -> bool {
159 let start = state.get_position();
160 let first = match state.current() {
161 Some(c) => c,
162 None => return false,
163 };
164
165 if !first.is_ascii_digit() && first != '-' && first != '+' {
166 return false;
167 }
168
169 let mut is_real = false;
170
171 if first == '-' || first == '+' {
173 state.advance(1);
174 if !state.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
175 state.set_position(start);
176 return false;
177 }
178 }
179
180 while let Some(c) = state.peek() {
182 if c.is_ascii_digit() {
183 state.advance(1);
184 }
185 else {
186 break;
187 }
188 }
189
190 if state.peek() == Some('.') {
192 let n1 = state.peek_next_n(1);
193 if n1.map(|c| c.is_ascii_digit()).unwrap_or(false) {
194 is_real = true;
195 state.advance(1); while let Some(c) = state.peek() {
197 if c.is_ascii_digit() {
198 state.advance(1);
199 }
200 else {
201 break;
202 }
203 }
204 }
205 }
206
207 if let Some(c) = state.peek() {
209 if c == 'e' || c == 'E' {
210 let n1 = state.peek_next_n(1);
211 if n1 == Some('+') || n1 == Some('-') || n1.map(|d| d.is_ascii_digit()).unwrap_or(false) {
212 is_real = true;
213 state.advance(1);
214 if let Some(sign) = state.peek() {
215 if sign == '+' || sign == '-' {
216 state.advance(1);
217 }
218 }
219 while let Some(d) = state.peek() {
220 if d.is_ascii_digit() {
221 state.advance(1);
222 }
223 else {
224 break;
225 }
226 }
227 }
228 }
229 }
230
231 let end = state.get_position();
232 state.add_token(if is_real { VampireSyntaxKind::RealLiteral } else { VampireSyntaxKind::IntegerLiteral }, start, end);
233 true
234 }
235
236 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
238 let start = state.get_position();
239 let ch = match state.current() {
240 Some(c) => c,
241 None => return false,
242 };
243
244 if !(ch.is_ascii_alphabetic() || ch == '_' || ch == '$') {
245 return false;
246 }
247
248 state.advance(1);
249 while let Some(c) = state.current() {
250 if c.is_ascii_alphanumeric() || c == '_' || c == '$' {
251 state.advance(1);
252 }
253 else {
254 break;
255 }
256 }
257
258 let end = state.get_position();
259 let text = state.get_text_in((start..end).into());
260 let kind = match text {
261 "fof" => VampireSyntaxKind::FofKw,
263 "cnf" => VampireSyntaxKind::CnfKw,
264 "tff" => VampireSyntaxKind::TffKw,
265 "thf" => VampireSyntaxKind::ThfKw,
266 "tpi" => VampireSyntaxKind::TpiKw,
267 "include" => VampireSyntaxKind::IncludeKw,
268
269 "axiom" => VampireSyntaxKind::AxiomKw,
271 "hypothesis" => VampireSyntaxKind::HypothesisKw,
272 "definition" => VampireSyntaxKind::DefinitionKw,
273 "assumption" => VampireSyntaxKind::AssumptionKw,
274 "lemma" => VampireSyntaxKind::LemmaKw,
275 "theorem" => VampireSyntaxKind::TheoremKw,
276 "conjecture" => VampireSyntaxKind::ConjectureKw,
277 "negated_conjecture" => VampireSyntaxKind::NegatedConjectureKw,
278 "plain" => VampireSyntaxKind::PlainKw,
279 "type" => VampireSyntaxKind::TypeKw,
280 "fi_domain" => VampireSyntaxKind::FiDomainKw,
281 "fi_functors" => VampireSyntaxKind::FiFunctorsKw,
282 "fi_predicates" => VampireSyntaxKind::FiPredicatesKw,
283 "unknown" => VampireSyntaxKind::UnknownKw,
284
285 "!" => VampireSyntaxKind::ForallKw,
287 "?" => VampireSyntaxKind::ExistsKw,
288 "&" => VampireSyntaxKind::AndKw,
289 "|" => VampireSyntaxKind::OrKw,
290 "~" => VampireSyntaxKind::NotKw,
291 "=>" => VampireSyntaxKind::ImpliesKw,
292 "<=>" => VampireSyntaxKind::IffKw,
293 "<~>" => VampireSyntaxKind::XorKw,
294 "~|" => VampireSyntaxKind::NorKw,
295 "~&" => VampireSyntaxKind::NandKw,
296
297 "$o" => VampireSyntaxKind::BoolKw,
299 "$i" => VampireSyntaxKind::IndividualKw,
300 "$int" => VampireSyntaxKind::IntKw,
301 "$real" => VampireSyntaxKind::RealKw,
302 "$rat" => VampireSyntaxKind::RatKw,
303 "$tType" => VampireSyntaxKind::TTypeKw,
304 "$oType" => VampireSyntaxKind::OTypeKw,
305 "$iType" => VampireSyntaxKind::ITypeKw,
306
307 "$true" => VampireSyntaxKind::BoolLiteral,
309 "$false" => VampireSyntaxKind::BoolLiteral,
310
311 _ => VampireSyntaxKind::Identifier,
312 };
313
314 state.add_token(kind, start, state.get_position());
315 true
316 }
317
318 fn lex_operators<S: Source>(&self, state: &mut State<S>) -> bool {
320 let start = state.get_position();
321 let rest = state.rest();
322
323 let patterns: &[(&str, VampireSyntaxKind)] = &[
325 ("<==>", VampireSyntaxKind::IffKw),
326 ("<~>", VampireSyntaxKind::XorKw),
327 ("==>", VampireSyntaxKind::ImpliesKw),
328 ("~|", VampireSyntaxKind::NorKw),
329 ("~&", VampireSyntaxKind::NandKw),
330 ("==", VampireSyntaxKind::EqEq),
331 ("!=", VampireSyntaxKind::NotEq),
332 ("<=", VampireSyntaxKind::LessEq),
333 (">=", VampireSyntaxKind::GreaterEq),
334 ("&&", VampireSyntaxKind::AndAnd),
335 ("||", VampireSyntaxKind::OrOr),
336 ("++", VampireSyntaxKind::PlusPlus),
337 ("--", VampireSyntaxKind::MinusMinus),
338 ("+=", VampireSyntaxKind::PlusEq),
339 ("-=", VampireSyntaxKind::MinusEq),
340 ("*=", VampireSyntaxKind::StarEq),
341 ("/=", VampireSyntaxKind::SlashEq),
342 ("%=", VampireSyntaxKind::PercentEq),
343 ("<<", VampireSyntaxKind::LeftShift),
344 (">>", VampireSyntaxKind::RightShift),
345 ("->", VampireSyntaxKind::Arrow),
346 ];
347
348 for (pat, kind) in patterns {
349 if rest.starts_with(pat) {
350 state.advance(pat.len());
351 state.add_token(*kind, start, state.get_position());
352 return true;
353 }
354 }
355
356 if let Some(ch) = state.current() {
358 let kind = match ch {
359 '=' => Some(VampireSyntaxKind::Eq),
360 '<' => Some(VampireSyntaxKind::LessThan),
361 '>' => Some(VampireSyntaxKind::GreaterThan),
362 '+' => Some(VampireSyntaxKind::Plus),
363 '-' => Some(VampireSyntaxKind::Minus),
364 '*' => Some(VampireSyntaxKind::Star),
365 '/' => Some(VampireSyntaxKind::Slash),
366 '\\' => Some(VampireSyntaxKind::Backslash),
367 '%' => Some(VampireSyntaxKind::Percent),
368 '^' => Some(VampireSyntaxKind::Caret),
369 '&' => Some(VampireSyntaxKind::Ampersand),
370 '|' => Some(VampireSyntaxKind::Pipe),
371 '~' => Some(VampireSyntaxKind::Tilde),
372 '!' => Some(VampireSyntaxKind::Bang),
373 '?' => Some(VampireSyntaxKind::Question),
374 '.' => Some(VampireSyntaxKind::Dot),
375 ':' => Some(VampireSyntaxKind::Colon),
376 _ => None,
377 };
378
379 if let Some(k) = kind {
380 state.advance(ch.len_utf8());
381 state.add_token(k, start, state.get_position());
382 return true;
383 }
384 }
385
386 false
387 }
388
389 fn lex_single_char_tokens<S: Source>(&self, state: &mut State<S>) -> bool {
391 let start = state.get_position();
392 if let Some(ch) = state.current() {
393 let kind = match ch {
394 '(' => VampireSyntaxKind::LeftParen,
395 ')' => VampireSyntaxKind::RightParen,
396 '[' => VampireSyntaxKind::LeftBracket,
397 ']' => VampireSyntaxKind::RightBracket,
398 '{' => VampireSyntaxKind::LeftBrace,
399 '}' => VampireSyntaxKind::RightBrace,
400 ',' => VampireSyntaxKind::Comma,
401 ';' => VampireSyntaxKind::Semicolon,
402 '@' => VampireSyntaxKind::At,
403 '#' => VampireSyntaxKind::Hash,
404 '$' => VampireSyntaxKind::Dollar,
405 _ => {
406 state.advance(ch.len_utf8());
408 state.add_token(VampireSyntaxKind::Error, start, state.get_position());
409 return true;
410 }
411 };
412
413 state.advance(ch.len_utf8());
414 state.add_token(kind, start, state.get_position());
415 true
416 }
417 else {
418 false
419 }
420 }
421}