1#![doc = include_str!("readme.md")]
2pub mod token_type;
4pub use token_type::VampireTokenType;
5
6use crate::language::VampireLanguage;
7use oak_core::{
8 Lexer, LexerCache, LexerState, OakError, TextEdit,
9 lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
10 source::Source,
11};
12use std::sync::LazyLock;
13
14type State<'s, S> = LexerState<'s, S, VampireLanguage>;
15
16static VAMPIRE_WHITESPACE: LazyLock<WhitespaceConfig> = LazyLock::new(|| WhitespaceConfig { unicode_whitespace: true });
17static VAMPIRE_COMMENT: LazyLock<CommentConfig> = LazyLock::new(|| CommentConfig { line_marker: "%", block_start: "/*", block_end: "*/", nested_blocks: false });
18static VAMPIRE_STRING: LazyLock<StringConfig> = LazyLock::new(|| StringConfig { quotes: &['"'], escape: Some('\\') });
19
20#[derive(Clone)]
22pub struct VampireLexer<'config> {
23 config: &'config VampireLanguage,
24}
25
26impl<'config> Lexer<VampireLanguage> for VampireLexer<'config> {
27 fn lex<'a, S: Source + ?Sized>(&self, text: &'a S, _edits: &[TextEdit], _cache: &'a mut impl LexerCache<VampireLanguage>) -> LexOutput<VampireLanguage> {
29 let mut state: State<'_, S> = LexerState::new(text);
30 let result = self.run(&mut state);
31 if result.is_ok() {
32 state.add_eof();
33 }
34 state.finish(result)
35 }
36}
37impl<'config> VampireLexer<'config> {
38 pub fn new(config: &'config VampireLanguage) -> Self {
40 Self { config }
41 }
42
43 fn run<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> Result<(), OakError> {
45 while state.not_at_end() {
46 let safe_point = state.get_position();
47
48 if self.skip_whitespace(state) {
49 continue;
50 }
51
52 if self.skip_comment(state) {
53 continue;
54 }
55
56 if self.lex_string_literal(state) {
57 continue;
58 }
59
60 if self.lex_number_literal(state) {
61 continue;
62 }
63
64 if self.lex_identifier_or_keyword(state) {
65 continue;
66 }
67
68 if self.lex_operators(state) {
69 continue;
70 }
71
72 if self.lex_single_char_tokens(state) {
73 continue;
74 }
75
76 state.advance_if_dead_lock(safe_point);
77 }
78
79 Ok(())
80 }
81
82 fn skip_whitespace<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
84 VAMPIRE_WHITESPACE.scan(state, VampireTokenType::Whitespace)
85 }
86
87 fn skip_comment<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
89 VAMPIRE_COMMENT.scan(state, VampireTokenType::LineComment, VampireTokenType::BlockComment)
90 }
91
92 fn lex_string_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
94 VAMPIRE_STRING.scan(state, VampireTokenType::StringLiteral)
95 }
96
97 fn lex_number_literal<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
99 let start = state.get_position();
100 let mut has_digit = false;
101
102 if state.peek() == Some('-') {
104 state.advance(1);
105 }
106
107 while let Some(ch) = state.peek() {
108 if ch.is_ascii_digit() {
109 state.advance(ch.len_utf8());
110 has_digit = true;
111 }
112 else {
113 break;
114 }
115 }
116
117 if !has_digit {
118 return false;
119 }
120
121 let mut is_float = false;
123 if state.peek() == Some('.') {
124 if let Some(next) = state.peek_next_n(1) {
125 if next.is_ascii_digit() {
126 state.advance(1); is_float = true;
128 while let Some(ch) = state.peek() {
129 if ch.is_ascii_digit() {
130 state.advance(ch.len_utf8());
131 }
132 else {
133 break;
134 }
135 }
136 }
137 }
138 }
139
140 if let Some(ch) = state.peek() {
141 if ch == 'e' || ch == 'E' {
142 state.advance(1);
143 is_float = true;
144 if let Some(ch) = state.peek() {
145 if ch == '+' || ch == '-' {
146 state.advance(1);
147 }
148 }
149 while let Some(ch) = state.peek() {
150 if ch.is_ascii_digit() {
151 state.advance(ch.len_utf8());
152 }
153 else {
154 break;
155 }
156 }
157 }
158 }
159
160 let kind = if is_float { VampireTokenType::RealLiteral } else { VampireTokenType::IntegerLiteral };
161 state.add_token(kind, start, state.get_position());
162 true
163 }
164
165 fn lex_identifier_or_keyword<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
167 let start = state.get_position();
168 if let Some(ch) = state.peek() {
169 if ch.is_ascii_alphabetic() || ch == '_' || ch == '$' {
170 state.advance(ch.len_utf8());
171 while let Some(ch) = state.peek() {
172 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' { state.advance(ch.len_utf8()) } else { break }
173 }
174
175 let end = state.get_position();
176 let text = state.get_text_in((start..end).into());
177 let kind = self.keyword_or_identifier(text.as_ref());
178 state.add_token(kind, start, end);
179 true
180 }
181 else {
182 false
183 }
184 }
185 else {
186 false
187 }
188 }
189
190 fn keyword_or_identifier(&self, text: &str) -> VampireTokenType {
192 match text {
193 "fof" => VampireTokenType::FofKw,
195 "cnf" => VampireTokenType::CnfKw,
196 "tff" => VampireTokenType::TffKw,
197 "thf" => VampireTokenType::ThfKw,
198 "tpi" => VampireTokenType::TpiKw,
199 "include" => VampireTokenType::IncludeKw,
200
201 "axiom" => VampireTokenType::AxiomKw,
203 "hypothesis" => VampireTokenType::HypothesisKw,
204 "definition" => VampireTokenType::DefinitionKw,
205 "assumption" => VampireTokenType::AssumptionKw,
206 "lemma" => VampireTokenType::LemmaKw,
207 "theorem" => VampireTokenType::TheoremKw,
208 "conjecture" => VampireTokenType::ConjectureKw,
209 "negated_conjecture" => VampireTokenType::NegatedConjectureKw,
210 "plain" => VampireTokenType::PlainKw,
211 "type" => VampireTokenType::TypeKw,
212 "fi_domain" => VampireTokenType::FiDomainKw,
213 "fi_functors" => VampireTokenType::FiFunctorsKw,
214 "fi_predicates" => VampireTokenType::FiPredicatesKw,
215 "unknown" => VampireTokenType::UnknownKw,
216
217 "!" => VampireTokenType::ForallKw,
219 "?" => VampireTokenType::ExistsKw,
220 "&" => VampireTokenType::AndKw,
221 "|" => VampireTokenType::OrKw,
222 "~" => VampireTokenType::NotKw,
223 "=>" => VampireTokenType::ImpliesKw,
224 "<=>" => VampireTokenType::IffKw,
225 "<~>" => VampireTokenType::XorKw,
226 "~|" => VampireTokenType::NorKw,
227 "~&" => VampireTokenType::NandKw,
228
229 "$o" => VampireTokenType::BoolKw,
231 "$i" => VampireTokenType::IndividualKw,
232 "$int" => VampireTokenType::IntKw,
233 "$real" => VampireTokenType::RealKw,
234 "$rat" => VampireTokenType::RatKw,
235 "$tType" => VampireTokenType::TTypeKw,
236 "$oType" => VampireTokenType::OTypeKw,
237 "$iType" => VampireTokenType::ITypeKw,
238
239 "$true" => VampireTokenType::BoolLiteral,
241 "$false" => VampireTokenType::BoolLiteral,
242
243 _ => VampireTokenType::Identifier,
244 }
245 }
246
247 fn lex_operators<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
249 let start = state.get_position();
250 let rest = state.rest();
251
252 let (kind, len) = if rest.starts_with("==>") {
253 (VampireTokenType::ImpliesKw, 3)
254 }
255 else if rest.starts_with("<=>") {
256 (VampireTokenType::IffKw, 3)
257 }
258 else if rest.starts_with("<~>") {
259 (VampireTokenType::XorKw, 3)
260 }
261 else if rest.starts_with("~|") {
262 (VampireTokenType::NorKw, 2)
263 }
264 else if rest.starts_with("~&") {
265 (VampireTokenType::NandKw, 2)
266 }
267 else if rest.starts_with("==") {
268 (VampireTokenType::DoubleEq, 2)
269 }
270 else if rest.starts_with("!=") {
271 (VampireTokenType::NotEq, 2)
272 }
273 else if rest.starts_with("<=") {
274 (VampireTokenType::LessEq, 2)
275 }
276 else if rest.starts_with(">=") {
277 (VampireTokenType::GreaterEq, 2)
278 }
279 else if rest.starts_with("&&") {
280 (VampireTokenType::AndAnd, 2)
281 }
282 else if rest.starts_with("||") {
283 (VampireTokenType::OrOr, 2)
284 }
285 else if rest.starts_with("++") {
286 (VampireTokenType::PlusPlus, 2)
287 }
288 else if rest.starts_with("--") {
289 (VampireTokenType::MinusMinus, 2)
290 }
291 else if rest.starts_with("+=") {
292 (VampireTokenType::PlusEq, 2)
293 }
294 else if rest.starts_with("-=") {
295 (VampireTokenType::MinusEq, 2)
296 }
297 else if rest.starts_with("*=") {
298 (VampireTokenType::StarEq, 2)
299 }
300 else if rest.starts_with("/=") {
301 (VampireTokenType::SlashEq, 2)
302 }
303 else if rest.starts_with("%=") {
304 (VampireTokenType::PercentEq, 2)
305 }
306 else if rest.starts_with("<<") {
307 (VampireTokenType::LeftShift, 2)
308 }
309 else if rest.starts_with(">>") {
310 (VampireTokenType::RightShift, 2)
311 }
312 else if rest.starts_with("->") {
313 (VampireTokenType::Arrow, 2)
314 }
315 else {
316 return false;
317 };
318
319 state.advance(len);
320 state.add_token(kind, start, state.get_position());
321 true
322 }
323
324 fn lex_single_char_tokens<'s, S: Source + ?Sized>(&self, state: &mut State<'s, S>) -> bool {
326 if let Some(ch) = state.peek() {
327 let start = state.get_position();
328 let kind = match ch {
329 '(' => Some(VampireTokenType::LeftParen),
330 ')' => Some(VampireTokenType::RightParen),
331 '[' => Some(VampireTokenType::LeftBracket),
332 ']' => Some(VampireTokenType::RightBracket),
333 '{' => Some(VampireTokenType::LeftBrace),
334 '}' => Some(VampireTokenType::RightBrace),
335 ':' => Some(VampireTokenType::Colon),
336 ';' => Some(VampireTokenType::Semicolon),
337 '.' => Some(VampireTokenType::Dot),
338 ',' => Some(VampireTokenType::Comma),
339 '?' => Some(VampireTokenType::Question),
340 '!' => Some(VampireTokenType::Bang),
341 '@' => Some(VampireTokenType::At),
342 '#' => Some(VampireTokenType::Hash),
343 '$' => Some(VampireTokenType::Dollar),
344 '%' => Some(VampireTokenType::Percent),
345 '^' => Some(VampireTokenType::Caret),
346 '&' => Some(VampireTokenType::Ampersand),
347 '*' => Some(VampireTokenType::Star),
348 '+' => Some(VampireTokenType::Plus),
349 '-' => Some(VampireTokenType::Minus),
350 '=' => Some(VampireTokenType::Eq),
351 '<' => Some(VampireTokenType::LessThan),
352 '>' => Some(VampireTokenType::GreaterThan),
353 '/' => Some(VampireTokenType::Slash),
354 '\\' => Some(VampireTokenType::Backslash),
355 '|' => Some(VampireTokenType::Pipe),
356 '~' => Some(VampireTokenType::Tilde),
357 _ => None,
358 };
359
360 if let Some(token_kind) = kind {
361 state.advance(ch.len_utf8());
362 state.add_token(token_kind, start, state.get_position());
363 true
364 }
365 else {
366 false
367 }
368 }
369 else {
370 false
371 }
372 }
373}