1use crate::{kind::VampireSyntaxKind, language::VampireLanguage};
2use oak_core::{
3 Lexer, LexerCache, LexerState, OakError, TextEdit,
4 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
5 source::Source,
6};
7use std::sync::LazyLock;
8
9type State<'s, S> = LexerState<'s, S, VampireLanguage>;
10
11static VAMPIRE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
12static VAMPIRE_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "%", block_start: "/*", block_end: "*/", nested_blocks: false });
13static VAMPIRE_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
14
15#[derive(Clone)]
16pub struct VampireLexer<'config> {
17 _config: &'config VampireLanguage,
18}
19
20impl<'config> Lexer<VampireLanguage> for VampireLexer<'config> {
21 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], _cache: &'a mut impl LexerCache<VampireLanguage>) -> LexOutput<VampireLanguage> {
22 let mut state: State<'_, S> = LexerState::new(text);
23 let result = self.run(&mut state);
24 if result.is_ok() {
25 state.add_eof();
26 }
27 state.finish(result)
28 }
29}
30
31impl<'config> VampireLexer<'config> {
32 pub fn new(config: &'config VampireLanguage) -> Self {
33 Self { _config: config }
34 }
35
36 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
38 while state.not_at_end() {
39 let safe_point = state.get_position();
40
41 if self.skip_whitespace(state) {
42 continue;
43 }
44
45 if self.skip_comment(state) {
46 continue;
47 }
48
49 if self.lex_string_literal(state) {
50 continue;
51 }
52
53 if self.lex_number_literal(state) {
54 continue;
55 }
56
57 if self.lex_identifier_or_keyword(state) {
58 continue;
59 }
60
61 if self.lex_operators(state) {
62 continue;
63 }
64
65 if self.lex_single_char_tokens(state) {
66 continue;
67 }
68
69 state.advance_if_dead_lock(safe_point);
70 }
71
72 Ok(())
73 }
74
75 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
77 VAMPIRE_WHITESPACE.scan(state, VampireSyntaxKind::Whitespace)
78 }
79
80 fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
82 VAMPIRE_COMMENT.scan(state, VampireSyntaxKind::LineComment, VampireSyntaxKind::BlockComment)
83 }
84
85 fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
87 VAMPIRE_STRING.scan(state, VampireSyntaxKind::StringLiteral)
88 }
89
90 fn lex_number_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
92 let start = state.get_position();
93 let mut has_digit = false;
94
95 if state.peek() == Some('-') {
97 state.advance(1);
98 }
99
100 while let Some(ch) = state.peek() {
101 if ch.is_ascii_digit() {
102 state.advance(ch.len_utf8());
103 has_digit = true;
104 }
105 else {
106 break;
107 }
108 }
109
110 if !has_digit {
111 return false;
112 }
113
114 let mut is_float = false;
116 if state.peek() == Some('.') {
117 if let Some(next) = state.peek_next_n(1) {
118 if next.is_ascii_digit() {
119 state.advance(1); is_float = true;
121 while let Some(ch) = state.peek() {
122 if ch.is_ascii_digit() {
123 state.advance(ch.len_utf8());
124 }
125 else {
126 break;
127 }
128 }
129 }
130 }
131 }
132
133 if let Some(ch) = state.peek() {
134 if ch == 'e' || ch == 'E' {
135 state.advance(1);
136 is_float = true;
137 if let Some(ch) = state.peek() {
138 if ch == '+' || ch == '-' {
139 state.advance(1);
140 }
141 }
142 while let Some(ch) = state.peek() {
143 if ch.is_ascii_digit() {
144 state.advance(ch.len_utf8());
145 }
146 else {
147 break;
148 }
149 }
150 }
151 }
152
153 let kind = if is_float { VampireSyntaxKind::RealLiteral } else { VampireSyntaxKind::IntegerLiteral };
154 state.add_token(kind, start, state.get_position());
155 true
156 }
157
158 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
160 let start = state.get_position();
161 if let Some(ch) = state.peek() {
162 if ch.is_ascii_alphabetic() || ch == '_' || ch == '$' {
163 state.advance(ch.len_utf8());
164 while let Some(ch) = state.peek() {
165 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' {
166 state.advance(ch.len_utf8());
167 }
168 else {
169 break;
170 }
171 }
172
173 let end = state.get_position();
174 let text = state.get_text_in((start..end).into());
175 let kind = self.keyword_or_identifier(text.as_ref());
176 state.add_token(kind, start, end);
177 true
178 }
179 else {
180 false
181 }
182 }
183 else {
184 false
185 }
186 }
187
188 fn keyword_or_identifier(&self, text: &str) -> VampireSyntaxKind {
190 match text {
191 "fof" => VampireSyntaxKind::FofKw,
193 "cnf" => VampireSyntaxKind::CnfKw,
194 "tff" => VampireSyntaxKind::TffKw,
195 "thf" => VampireSyntaxKind::ThfKw,
196 "tpi" => VampireSyntaxKind::TpiKw,
197 "include" => VampireSyntaxKind::IncludeKw,
198
199 "axiom" => VampireSyntaxKind::AxiomKw,
201 "hypothesis" => VampireSyntaxKind::HypothesisKw,
202 "definition" => VampireSyntaxKind::DefinitionKw,
203 "assumption" => VampireSyntaxKind::AssumptionKw,
204 "lemma" => VampireSyntaxKind::LemmaKw,
205 "theorem" => VampireSyntaxKind::TheoremKw,
206 "conjecture" => VampireSyntaxKind::ConjectureKw,
207 "negated_conjecture" => VampireSyntaxKind::NegatedConjectureKw,
208 "plain" => VampireSyntaxKind::PlainKw,
209 "type" => VampireSyntaxKind::TypeKw,
210 "fi_domain" => VampireSyntaxKind::FiDomainKw,
211 "fi_functors" => VampireSyntaxKind::FiFunctorsKw,
212 "fi_predicates" => VampireSyntaxKind::FiPredicatesKw,
213 "unknown" => VampireSyntaxKind::UnknownKw,
214
215 "!" => VampireSyntaxKind::ForallKw,
217 "?" => VampireSyntaxKind::ExistsKw,
218 "&" => VampireSyntaxKind::AndKw,
219 "|" => VampireSyntaxKind::OrKw,
220 "~" => VampireSyntaxKind::NotKw,
221 "=>" => VampireSyntaxKind::ImpliesKw,
222 "<=>" => VampireSyntaxKind::IffKw,
223 "<~>" => VampireSyntaxKind::XorKw,
224 "~|" => VampireSyntaxKind::NorKw,
225 "~&" => VampireSyntaxKind::NandKw,
226
227 "$o" => VampireSyntaxKind::BoolKw,
229 "$i" => VampireSyntaxKind::IndividualKw,
230 "$int" => VampireSyntaxKind::IntKw,
231 "$real" => VampireSyntaxKind::RealKw,
232 "$rat" => VampireSyntaxKind::RatKw,
233 "$tType" => VampireSyntaxKind::TTypeKw,
234 "$oType" => VampireSyntaxKind::OTypeKw,
235 "$iType" => VampireSyntaxKind::ITypeKw,
236
237 "$true" => VampireSyntaxKind::BoolLiteral,
239 "$false" => VampireSyntaxKind::BoolLiteral,
240
241 _ => VampireSyntaxKind::Identifier,
242 }
243 }
244
245 fn lex_operators<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
247 let start = state.get_position();
248 let rest = state.rest();
249
250 let (kind, len) = if rest.starts_with("==>") {
251 (VampireSyntaxKind::ImpliesKw, 3)
252 }
253 else if rest.starts_with("<=>") {
254 (VampireSyntaxKind::IffKw, 3)
255 }
256 else if rest.starts_with("<~>") {
257 (VampireSyntaxKind::XorKw, 3)
258 }
259 else if rest.starts_with("~|") {
260 (VampireSyntaxKind::NorKw, 2)
261 }
262 else if rest.starts_with("~&") {
263 (VampireSyntaxKind::NandKw, 2)
264 }
265 else if rest.starts_with("==") {
266 (VampireSyntaxKind::EqEq, 2)
267 }
268 else if rest.starts_with("!=") {
269 (VampireSyntaxKind::NotEq, 2)
270 }
271 else if rest.starts_with("<=") {
272 (VampireSyntaxKind::LessEq, 2)
273 }
274 else if rest.starts_with(">=") {
275 (VampireSyntaxKind::GreaterEq, 2)
276 }
277 else if rest.starts_with("&&") {
278 (VampireSyntaxKind::AndAnd, 2)
279 }
280 else if rest.starts_with("||") {
281 (VampireSyntaxKind::OrOr, 2)
282 }
283 else if rest.starts_with("++") {
284 (VampireSyntaxKind::PlusPlus, 2)
285 }
286 else if rest.starts_with("--") {
287 (VampireSyntaxKind::MinusMinus, 2)
288 }
289 else if rest.starts_with("+=") {
290 (VampireSyntaxKind::PlusEq, 2)
291 }
292 else if rest.starts_with("-=") {
293 (VampireSyntaxKind::MinusEq, 2)
294 }
295 else if rest.starts_with("*=") {
296 (VampireSyntaxKind::StarEq, 2)
297 }
298 else if rest.starts_with("/=") {
299 (VampireSyntaxKind::SlashEq, 2)
300 }
301 else if rest.starts_with("%=") {
302 (VampireSyntaxKind::PercentEq, 2)
303 }
304 else if rest.starts_with("<<") {
305 (VampireSyntaxKind::LeftShift, 2)
306 }
307 else if rest.starts_with(">>") {
308 (VampireSyntaxKind::RightShift, 2)
309 }
310 else if rest.starts_with("->") {
311 (VampireSyntaxKind::Arrow, 2)
312 }
313 else {
314 return false;
315 };
316
317 state.advance(len);
318 state.add_token(kind, start, state.get_position());
319 true
320 }
321
322 fn lex_single_char_tokens<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
324 if let Some(ch) = state.peek() {
325 let start = state.get_position();
326 let kind = match ch {
327 '(' => Some(VampireSyntaxKind::LeftParen),
328 ')' => Some(VampireSyntaxKind::RightParen),
329 '[' => Some(VampireSyntaxKind::LeftBracket),
330 ']' => Some(VampireSyntaxKind::RightBracket),
331 '{' => Some(VampireSyntaxKind::LeftBrace),
332 '}' => Some(VampireSyntaxKind::RightBrace),
333 ':' => Some(VampireSyntaxKind::Colon),
334 ';' => Some(VampireSyntaxKind::Semicolon),
335 '.' => Some(VampireSyntaxKind::Dot),
336 ',' => Some(VampireSyntaxKind::Comma),
337 '?' => Some(VampireSyntaxKind::Question),
338 '!' => Some(VampireSyntaxKind::Bang),
339 '@' => Some(VampireSyntaxKind::At),
340 '#' => Some(VampireSyntaxKind::Hash),
341 '$' => Some(VampireSyntaxKind::Dollar),
342 '%' => Some(VampireSyntaxKind::Percent),
343 '^' => Some(VampireSyntaxKind::Caret),
344 '&' => Some(VampireSyntaxKind::Ampersand),
345 '*' => Some(VampireSyntaxKind::Star),
346 '+' => Some(VampireSyntaxKind::Plus),
347 '-' => Some(VampireSyntaxKind::Minus),
348 '=' => Some(VampireSyntaxKind::Eq),
349 '<' => Some(VampireSyntaxKind::LessThan),
350 '>' => Some(VampireSyntaxKind::GreaterThan),
351 '/' => Some(VampireSyntaxKind::Slash),
352 '\\' => Some(VampireSyntaxKind::Backslash),
353 '|' => Some(VampireSyntaxKind::Pipe),
354 '~' => Some(VampireSyntaxKind::Tilde),
355 _ => None,
356 };
357
358 if let Some(token_kind) = kind {
359 state.advance(ch.len_utf8());
360 state.add_token(token_kind, start, state.get_position());
361 true
362 }
363 else {
364 false
365 }
366 }
367 else {
368 false
369 }
370 }
371}