1#![doc = include_str!("readme.md")]
2pub mod token_type;
3pub use token_type::VampireTokenType;
4
5use crate::language::VampireLanguage;
6use oak_core::{
7 Lexer, LexerCache, LexerState, OakError, TextEdit,
8 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
9 source::Source,
10};
11use std::sync::LazyLock;
12
13type State<'s, S> = LexerState<'s, S, VampireLanguage>;
14
15static VAMPIRE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
16static VAMPIRE_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "%", block_start: "/*", block_end: "*/", nested_blocks: false });
17static VAMPIRE_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
18
19#[derive(Clone)]
20pub struct VampireLexer<'config> {
21 config: &'config VampireLanguage,
22}
23
24impl<'config> Lexer<VampireLanguage> for VampireLexer<'config> {
25 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], _cache: &'a mut impl LexerCache<VampireLanguage>) -> LexOutput<VampireLanguage> {
26 let mut state: State<'_, S> = LexerState::new(text);
27 let result = self.run(&mut state);
28 if result.is_ok() {
29 state.add_eof();
30 }
31 state.finish(result)
32 }
33}
34impl<'config> VampireLexer<'config> {
35 pub fn new(config: &'config VampireLanguage) -> Self {
36 Self { config }
37 }
38
39 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
41 while state.not_at_end() {
42 let safe_point = state.get_position();
43
44 if self.skip_whitespace(state) {
45 continue;
46 }
47
48 if self.skip_comment(state) {
49 continue;
50 }
51
52 if self.lex_string_literal(state) {
53 continue;
54 }
55
56 if self.lex_number_literal(state) {
57 continue;
58 }
59
60 if self.lex_identifier_or_keyword(state) {
61 continue;
62 }
63
64 if self.lex_operators(state) {
65 continue;
66 }
67
68 if self.lex_single_char_tokens(state) {
69 continue;
70 }
71
72 state.advance_if_dead_lock(safe_point);
73 }
74
75 Ok(())
76 }
77
78 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
80 VAMPIRE_WHITESPACE.scan(state, VampireTokenType::Whitespace)
81 }
82
83 fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
85 VAMPIRE_COMMENT.scan(state, VampireTokenType::LineComment, VampireTokenType::BlockComment)
86 }
87
88 fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
90 VAMPIRE_STRING.scan(state, VampireTokenType::StringLiteral)
91 }
92
93 fn lex_number_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
95 let start = state.get_position();
96 let mut has_digit = false;
97
98 if state.peek() == Some('-') {
100 state.advance(1);
101 }
102
103 while let Some(ch) = state.peek() {
104 if ch.is_ascii_digit() {
105 state.advance(ch.len_utf8());
106 has_digit = true;
107 }
108 else {
109 break;
110 }
111 }
112
113 if !has_digit {
114 return false;
115 }
116
117 let mut is_float = false;
119 if state.peek() == Some('.') {
120 if let Some(next) = state.peek_next_n(1) {
121 if next.is_ascii_digit() {
122 state.advance(1); is_float = true;
124 while let Some(ch) = state.peek() {
125 if ch.is_ascii_digit() {
126 state.advance(ch.len_utf8());
127 }
128 else {
129 break;
130 }
131 }
132 }
133 }
134 }
135
136 if let Some(ch) = state.peek() {
137 if ch == 'e' || ch == 'E' {
138 state.advance(1);
139 is_float = true;
140 if let Some(ch) = state.peek() {
141 if ch == '+' || ch == '-' {
142 state.advance(1);
143 }
144 }
145 while let Some(ch) = state.peek() {
146 if ch.is_ascii_digit() {
147 state.advance(ch.len_utf8());
148 }
149 else {
150 break;
151 }
152 }
153 }
154 }
155
156 let kind = if is_float { VampireTokenType::RealLiteral } else { VampireTokenType::IntegerLiteral };
157 state.add_token(kind, start, state.get_position());
158 true
159 }
160
161 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
163 let start = state.get_position();
164 if let Some(ch) = state.peek() {
165 if ch.is_ascii_alphabetic() || ch == '_' || ch == '$' {
166 state.advance(ch.len_utf8());
167 while let Some(ch) = state.peek() {
168 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' { state.advance(ch.len_utf8()) } else { break }
169 }
170
171 let end = state.get_position();
172 let text = state.get_text_in((start..end).into());
173 let kind = self.keyword_or_identifier(text.as_ref());
174 state.add_token(kind, start, end);
175 true
176 }
177 else {
178 false
179 }
180 }
181 else {
182 false
183 }
184 }
185
186 fn keyword_or_identifier(&self, text: &str) -> VampireTokenType {
188 match text {
189 "fof" => VampireTokenType::FofKw,
191 "cnf" => VampireTokenType::CnfKw,
192 "tff" => VampireTokenType::TffKw,
193 "thf" => VampireTokenType::ThfKw,
194 "tpi" => VampireTokenType::TpiKw,
195 "include" => VampireTokenType::IncludeKw,
196
197 "axiom" => VampireTokenType::AxiomKw,
199 "hypothesis" => VampireTokenType::HypothesisKw,
200 "definition" => VampireTokenType::DefinitionKw,
201 "assumption" => VampireTokenType::AssumptionKw,
202 "lemma" => VampireTokenType::LemmaKw,
203 "theorem" => VampireTokenType::TheoremKw,
204 "conjecture" => VampireTokenType::ConjectureKw,
205 "negated_conjecture" => VampireTokenType::NegatedConjectureKw,
206 "plain" => VampireTokenType::PlainKw,
207 "type" => VampireTokenType::TypeKw,
208 "fi_domain" => VampireTokenType::FiDomainKw,
209 "fi_functors" => VampireTokenType::FiFunctorsKw,
210 "fi_predicates" => VampireTokenType::FiPredicatesKw,
211 "unknown" => VampireTokenType::UnknownKw,
212
213 "!" => VampireTokenType::ForallKw,
215 "?" => VampireTokenType::ExistsKw,
216 "&" => VampireTokenType::AndKw,
217 "|" => VampireTokenType::OrKw,
218 "~" => VampireTokenType::NotKw,
219 "=>" => VampireTokenType::ImpliesKw,
220 "<=>" => VampireTokenType::IffKw,
221 "<~>" => VampireTokenType::XorKw,
222 "~|" => VampireTokenType::NorKw,
223 "~&" => VampireTokenType::NandKw,
224
225 "$o" => VampireTokenType::BoolKw,
227 "$i" => VampireTokenType::IndividualKw,
228 "$int" => VampireTokenType::IntKw,
229 "$real" => VampireTokenType::RealKw,
230 "$rat" => VampireTokenType::RatKw,
231 "$tType" => VampireTokenType::TTypeKw,
232 "$oType" => VampireTokenType::OTypeKw,
233 "$iType" => VampireTokenType::ITypeKw,
234
235 "$true" => VampireTokenType::BoolLiteral,
237 "$false" => VampireTokenType::BoolLiteral,
238
239 _ => VampireTokenType::Identifier,
240 }
241 }
242
243 fn lex_operators<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
245 let start = state.get_position();
246 let rest = state.rest();
247
248 let (kind, len) = if rest.starts_with("==>") {
249 (VampireTokenType::ImpliesKw, 3)
250 }
251 else if rest.starts_with("<=>") {
252 (VampireTokenType::IffKw, 3)
253 }
254 else if rest.starts_with("<~>") {
255 (VampireTokenType::XorKw, 3)
256 }
257 else if rest.starts_with("~|") {
258 (VampireTokenType::NorKw, 2)
259 }
260 else if rest.starts_with("~&") {
261 (VampireTokenType::NandKw, 2)
262 }
263 else if rest.starts_with("==") {
264 (VampireTokenType::DoubleEq, 2)
265 }
266 else if rest.starts_with("!=") {
267 (VampireTokenType::NotEq, 2)
268 }
269 else if rest.starts_with("<=") {
270 (VampireTokenType::LessEq, 2)
271 }
272 else if rest.starts_with(">=") {
273 (VampireTokenType::GreaterEq, 2)
274 }
275 else if rest.starts_with("&&") {
276 (VampireTokenType::AndAnd, 2)
277 }
278 else if rest.starts_with("||") {
279 (VampireTokenType::OrOr, 2)
280 }
281 else if rest.starts_with("++") {
282 (VampireTokenType::PlusPlus, 2)
283 }
284 else if rest.starts_with("--") {
285 (VampireTokenType::MinusMinus, 2)
286 }
287 else if rest.starts_with("+=") {
288 (VampireTokenType::PlusEq, 2)
289 }
290 else if rest.starts_with("-=") {
291 (VampireTokenType::MinusEq, 2)
292 }
293 else if rest.starts_with("*=") {
294 (VampireTokenType::StarEq, 2)
295 }
296 else if rest.starts_with("/=") {
297 (VampireTokenType::SlashEq, 2)
298 }
299 else if rest.starts_with("%=") {
300 (VampireTokenType::PercentEq, 2)
301 }
302 else if rest.starts_with("<<") {
303 (VampireTokenType::LeftShift, 2)
304 }
305 else if rest.starts_with(">>") {
306 (VampireTokenType::RightShift, 2)
307 }
308 else if rest.starts_with("->") {
309 (VampireTokenType::Arrow, 2)
310 }
311 else {
312 return false;
313 };
314
315 state.advance(len);
316 state.add_token(kind, start, state.get_position());
317 true
318 }
319
320 fn lex_single_char_tokens<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
322 if let Some(ch) = state.peek() {
323 let start = state.get_position();
324 let kind = match ch {
325 '(' => Some(VampireTokenType::LeftParen),
326 ')' => Some(VampireTokenType::RightParen),
327 '[' => Some(VampireTokenType::LeftBracket),
328 ']' => Some(VampireTokenType::RightBracket),
329 '{' => Some(VampireTokenType::LeftBrace),
330 '}' => Some(VampireTokenType::RightBrace),
331 ':' => Some(VampireTokenType::Colon),
332 ';' => Some(VampireTokenType::Semicolon),
333 '.' => Some(VampireTokenType::Dot),
334 ',' => Some(VampireTokenType::Comma),
335 '?' => Some(VampireTokenType::Question),
336 '!' => Some(VampireTokenType::Bang),
337 '@' => Some(VampireTokenType::At),
338 '#' => Some(VampireTokenType::Hash),
339 '$' => Some(VampireTokenType::Dollar),
340 '%' => Some(VampireTokenType::Percent),
341 '^' => Some(VampireTokenType::Caret),
342 '&' => Some(VampireTokenType::Ampersand),
343 '*' => Some(VampireTokenType::Star),
344 '+' => Some(VampireTokenType::Plus),
345 '-' => Some(VampireTokenType::Minus),
346 '=' => Some(VampireTokenType::Eq),
347 '<' => Some(VampireTokenType::LessThan),
348 '>' => Some(VampireTokenType::GreaterThan),
349 '/' => Some(VampireTokenType::Slash),
350 '\\' => Some(VampireTokenType::Backslash),
351 '|' => Some(VampireTokenType::Pipe),
352 '~' => Some(VampireTokenType::Tilde),
353 _ => None,
354 };
355
356 if let Some(token_kind) = kind {
357 state.advance(ch.len_utf8());
358 state.add_token(token_kind, start, state.get_position());
359 true
360 }
361 else {
362 false
363 }
364 }
365 else {
366 false
367 }
368 }
369}