1use crate::{parser::Token, Decimal, Numeral};
7use num::Num;
8
9#[derive(Debug, Clone, Default, Eq, PartialEq)]
11pub struct Position {
12 pub path: Option<String>,
13 pub line: usize,
14 pub column: usize,
15}
16
17impl Position {
18 pub fn new(path: Option<String>, line: usize, column: usize) -> Self {
19 Self { path, line, column }
20 }
21}
22
23impl std::fmt::Display for Position {
24 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25 match &self.path {
26 Some(path) => write!(f, "{}:{}:{}", path, self.line, self.column),
27 None => write!(f, "{}:{}", self.line, self.column),
28 }
29 }
30}
31
32pub(crate) struct Lexer<R> {
33 reader: R,
34 reserved_words: Vec<Token>,
35 reserved_words_map: fst::Map<Vec<u8>>,
36 current_offset: usize,
37 current_line: usize,
38 current_column: usize,
39}
40
41const KEYWORDS: &[(&str, Token)] = {
42 use Token::*;
43 &[
44 ("_", Underscore),
45 ("!", Exclam),
46 ("as", As),
47 ("let", Let),
48 ("exists", Exists),
49 ("forall", Forall),
50 ("match", Match),
51 ("par", Par),
52 ("assert", Assert),
53 ("check-sat", CheckSat),
54 ("check-sat-assuming", CheckSatAssuming),
55 ("declare-const", DeclareConst),
56 ("declare-datatype", DeclareDatatype),
57 ("declare-datatypes", DeclareDatatypes),
58 ("declare-fun", DeclareFun),
59 ("declare-sort", DeclareSort),
60 ("define-fun", DefineFun),
61 ("define-fun-rec", DefineFunRec),
62 ("define-funs-rec", DefineFunsRec),
63 ("define-sort", DefineSort),
64 ("echo", Echo),
65 ("exit", Exit),
66 ("get-assertions", GetAssertions),
67 ("get-assignment", GetAssignment),
68 ("get-info", GetInfo),
69 ("get-model", GetModel),
70 ("get-option", GetOption),
71 ("get-proof", GetProof),
72 ("get-unsat-assumptions", GetUnsatAssumptions),
73 ("get-unsat-core", GetUnsatCore),
74 ("get-value", GetValue),
75 ("pop", Pop),
76 ("push", Push),
77 ("reset", Reset),
78 ("reset-assertions", ResetAssertions),
79 ("set-info", SetInfo),
80 ("set-logic", SetLogic),
81 ("set-option", SetOption),
82 ]
83};
84
85impl<R> Lexer<R>
86where
87 R: std::io::BufRead,
88{
89 pub(crate) fn new(reader: R) -> Self {
90 let (reserved_words, reserved_words_map) = Self::make_reserved_words();
91 Self {
92 reader,
93 reserved_words,
94 reserved_words_map,
95 current_offset: 0,
96 current_line: 0,
97 current_column: 0,
98 }
99 }
100
101 #[inline]
102 fn lookup_symbol(&self, s: &[u8]) -> Option<&Token> {
103 self.reserved_words_map
104 .get(s)
105 .map(|index| &self.reserved_words[index as usize])
106 }
107
108 fn make_reserved_words() -> (Vec<Token>, fst::Map<Vec<u8>>) {
109 let mut keywords = KEYWORDS.to_vec();
110 keywords.sort_by_key(|(key, _)| key.to_string());
111 let mut words = Vec::new();
112 for (_, token) in &keywords {
113 words.push(token.clone());
114 }
115 let map = fst::Map::from_iter(
116 keywords
117 .iter()
118 .enumerate()
119 .map(|(index, (key, _))| (key, index as u64)),
120 )
121 .expect("Failed to create reserved token map");
122 (words, map)
123 }
124
125 #[cfg(test)]
126 pub(crate) fn current_offset(&self) -> usize {
127 self.current_offset
128 }
129
130 #[cfg(test)]
131 pub(crate) fn current_column(&self) -> usize {
132 self.current_column
133 }
134
135 #[cfg(test)]
136 pub(crate) fn current_line(&self) -> usize {
137 self.current_line
138 }
139
140 #[inline]
141 pub(crate) fn update_position(&self, pos: &mut Position) {
142 pos.line = self.current_line + 1;
143 pos.column = self.current_column + 1;
144 }
145
146 fn consume_byte(&mut self) {
147 if let Some(c) = self.peek_byte() {
148 if *c == b'\n' {
149 self.current_line += 1;
150 self.current_column = 0;
151 } else {
152 self.current_column += 1;
153 }
154 self.current_offset += 1;
155 self.reader.consume(1)
156 }
157 }
158
159 fn read_byte(&mut self) -> Option<u8> {
160 let c = self.peek_byte().cloned();
161 self.consume_byte();
162 c
163 }
164
165 #[inline]
166 fn peek_bytes(&mut self) -> &[u8] {
167 self.reader
168 .fill_buf()
169 .expect("Error while reading input stream")
170 }
171
172 fn peek_byte(&mut self) -> Option<&u8> {
173 self.peek_bytes().get(0)
174 }
175
176 fn skip_whitespace(&mut self) -> bool {
177 match self.peek_byte() {
178 Some(b) if matches!(b, b' ' | b'\n' | b'\t' | b'\r') => {
179 self.consume_byte();
180 true
181 }
182 _ => false,
183 }
184 }
185
186 fn skip_comment(&mut self) -> bool {
187 match self.peek_byte() {
188 Some(c) if *c == b';' => {
189 self.consume_byte();
190 while let Some(c) = self.read_byte() {
191 if c == b'\n' {
192 break;
193 }
194 }
195 true
196 }
197 _ => false,
198 }
199 }
200}
201
202impl<R> Iterator for Lexer<R>
203where
204 R: std::io::BufRead,
205{
206 type Item = Token;
207
208 fn next(&mut self) -> Option<Token> {
209 while self.skip_whitespace() || self.skip_comment() {}
210 match self.peek_byte() {
211 Some(b'(') => {
213 self.consume_byte();
214 Some(Token::LeftParen)
215 }
216 Some(b')') => {
217 self.consume_byte();
218 Some(Token::RightParen)
219 }
220 Some(b'|') => {
222 self.consume_byte();
223 let mut content = Vec::new();
224 while let Some(c) = self.read_byte() {
225 if c == b'|' {
226 return match String::from_utf8(content) {
227 Ok(s) => Some(Token::Symbol(s)),
228 Err(_) => None,
229 };
230 }
231 content.push(c);
232 }
233 None
235 }
236 Some(b'"') => {
238 self.consume_byte();
239 let mut content = Vec::new();
240 while let Some(c) = self.read_byte() {
241 if c == b'"' {
242 if let Some(d) = self.peek_byte() {
243 if *d == b'"' {
244 self.consume_byte();
245 content.push(b'"');
246 continue;
247 }
248 }
249 return match String::from_utf8(content) {
250 Ok(s) => Some(Token::String(s)),
251 Err(_) => None,
252 };
253 }
254 content.push(c);
255 }
256 None
258 }
259 Some(b'#') => {
261 self.consume_byte();
262 match self.peek_byte() {
263 Some(b'b') => {
264 self.consume_byte();
265 let mut content = Vec::new();
266 while let Some(c) = self.peek_byte() {
267 match c {
268 b'0' => content.push(false),
269 b'1' => content.push(true),
270 _ => break,
271 }
272 self.consume_byte();
273 }
274 if content.is_empty() {
275 None
276 } else {
277 Some(Token::Binary(content))
278 }
279 }
280 Some(b'x') => {
281 self.consume_byte();
282 let mut content = Vec::new();
283 while let Some(c) = self.peek_byte() {
284 match c {
285 b'0'..=b'9' => content.push(c - b'0'),
286 b'a'..=b'f' => content.push(c - b'a' + 10),
287 b'A'..=b'F' => content.push(c - b'A' + 10),
288 _ => break,
289 }
290 self.consume_byte();
291 }
292 if content.is_empty() {
293 None
294 } else {
295 Some(Token::Hexadecimal(content))
296 }
297 }
298 _ => None,
299 }
300 }
301 Some(digit @ b'0'..=b'9') => {
303 let mut numerator = vec![*digit];
304 self.consume_byte();
305 while let Some(c) = self.peek_byte() {
306 if !is_digit_byte(*c) {
307 break;
308 }
309 numerator.push(*c);
310 self.consume_byte();
311 }
312 if numerator.len() > 1 && numerator.starts_with(b"0") {
313 return None;
314 }
315 let numerator = String::from_utf8(numerator).unwrap();
316 match self.peek_byte() {
317 Some(b'.') => {
318 self.consume_byte();
319 let mut denumerator = Vec::new();
320 while let Some(c) = self.peek_byte() {
321 if !is_digit_byte(*c) {
322 break;
323 }
324 denumerator.push(*c);
325 self.consume_byte();
326 }
327 if denumerator.is_empty() {
328 return None;
329 }
330 let denumerator = String::from_utf8(denumerator).unwrap();
331 let num =
332 num::BigInt::from_str_radix(&(numerator + &denumerator), 10).ok()?;
333 let denom = num::BigInt::from(10u32).pow(denumerator.len() as u32);
334 Some(Token::Decimal(Decimal::new(num, denom)))
335 }
336 _ => Some(Token::Numeral(
337 Numeral::from_str_radix(&numerator, 10).ok()?,
338 )),
339 }
340 }
341 Some(b':') => {
343 self.consume_byte();
344 let mut content = Vec::new();
345 while let Some(c) = self.peek_byte() {
346 if !is_symbol_byte(*c) {
347 break;
348 }
349 content.push(*c);
350 self.consume_byte();
351 }
352 match String::from_utf8(content) {
353 Ok(s) => Some(Token::Keyword(s)),
354 Err(_) => None,
355 }
356 }
357 Some(c) if is_non_digit_symbol_byte(*c) => {
359 let mut content = vec![*c];
360 self.consume_byte();
361 while let Some(c) = self.peek_byte() {
362 if !is_symbol_byte(*c) {
363 break;
364 }
365 content.push(*c);
366 self.consume_byte();
367 }
368 match self.lookup_symbol(&content) {
369 Some(token) => Some(token.clone()),
370 None => match String::from_utf8(content) {
371 Ok(s) => Some(Token::Symbol(s)),
372 Err(_) => None,
373 },
374 }
375 }
376 _ => None,
378 }
379 }
380}
381
382fn is_digit_byte(c: u8) -> bool {
383 matches!(c, b'0'..=b'9')
384}
385
386pub(crate) fn is_symbol_byte(c: u8) -> bool {
387 is_digit_byte(c) || is_non_digit_symbol_byte(c)
388}
389
390pub(crate) fn is_non_digit_symbol_byte(c: u8) -> bool {
391 matches!(c,
392 b'a'..=b'z'
393 | b'A'..=b'Z'
394 | b'~'
395 | b'!'
396 | b'@'
397 | b'$'
398 | b'%'
399 | b'^'
400 | b'&'
401 | b'*'
402 | b'_'
403 | b'-'
404 | b'+'
405 | b'='
406 | b'<'
407 | b'>'
408 | b'.'
409 | b'?'
410 | b'/')
411}
412
413#[test]
414fn test_spaces() {
415 let input = b"xx \n asd ";
416 let mut lexer = Lexer::new(&input[..]);
417 let tokens: Vec<_> = (&mut lexer).collect();
418 assert_eq!(
419 tokens,
420 vec![Token::Symbol("xx".into()), Token::Symbol("asd".into())]
421 );
422 assert_eq!(lexer.current_line(), 1);
423 assert_eq!(lexer.current_column(), 5);
424 assert_eq!(lexer.current_offset(), input.len());
425}
426
427#[test]
428fn test_error() {
429 let input = b"xx \\";
430 let mut lexer = Lexer::new(&input[..]);
431 assert_eq!(
432 (&mut lexer).collect::<Vec<_>>(),
433 vec![Token::Symbol("xx".into())]
434 );
435 assert_eq!(lexer.current_line(), 0);
436 assert_eq!(lexer.current_column(), input.len() - 1);
437 assert_eq!(lexer.current_offset(), input.len() - 1);
438}
439
440#[test]
441fn test_literals() {
442 let lexer = Lexer::new(&b"135"[..]);
443 assert_eq!(
444 lexer.collect::<Vec<_>>(),
445 vec![Token::Numeral(Numeral::from(135u32))]
446 );
447
448 let lexer = Lexer::new(&b"0"[..]);
449 assert_eq!(
450 lexer.collect::<Vec<_>>(),
451 vec![Token::Numeral(Numeral::from(0u32))]
452 );
453
454 let lexer = Lexer::new(&b"(0 59)"[..]);
455 assert_eq!(
456 lexer.collect::<Vec<_>>(),
457 vec![
458 Token::LeftParen,
459 Token::Numeral(Numeral::from(0u32)),
460 Token::Numeral(Numeral::from(59u32)),
461 Token::RightParen
462 ]
463 );
464
465 let lexer = Lexer::new(&b"135"[..]);
466 assert_eq!(
467 lexer.collect::<Vec<_>>(),
468 vec![Token::Numeral(Numeral::from(135u32))]
469 );
470
471 let lexer = Lexer::new(&b"135.2"[..]);
472 assert_eq!(
473 lexer.collect::<Vec<_>>(),
474 vec![Token::Decimal(Decimal::from((
475 1352u32.into(),
476 10u32.into()
477 )))]
478 );
479
480 let mut lexer = Lexer::new(&b"0135"[..]);
481 assert!(lexer.next().is_none());
482
483 let mut lexer = Lexer::new(&b"135."[..]);
484 assert!(lexer.next().is_none());
485
486 let lexer = Lexer::new(&b"#b101"[..]);
487 assert_eq!(
488 lexer.collect::<Vec<_>>(),
489 vec![Token::Binary(vec![true, false, true])]
490 );
491
492 let lexer = Lexer::new(&b"#x1Ae"[..]);
493 assert_eq!(
494 lexer.collect::<Vec<_>>(),
495 vec![Token::Hexadecimal(vec![1, 10, 14])]
496 );
497
498 let lexer = Lexer::new(&br#""acv""12""#[..]);
499 assert_eq!(
500 lexer.collect::<Vec<_>>(),
501 vec![Token::String("acv\"12".into())]
502 );
503
504 let lexer = Lexer::new(&br#""acv12""#[..]);
505 assert_eq!(
506 lexer.collect::<Vec<_>>(),
507 vec![Token::String("acv12".into())]
508 );
509}
510
511#[test]
512fn test_symbol() {
513 let lexer = Lexer::new(&b"A$@135"[..]);
514 assert_eq!(
515 lexer.collect::<Vec<_>>(),
516 vec![Token::Symbol("A$@135".into())]
517 );
518
519 let lexer = Lexer::new(&b"|135|"[..]);
520 assert_eq!(lexer.collect::<Vec<_>>(), vec![Token::Symbol("135".into())]);
521}
522
523#[test]
524fn test_keyword() {
525 let lexer = Lexer::new(&b":A$@135"[..]);
526 assert_eq!(
527 lexer.collect::<Vec<_>>(),
528 vec![Token::Keyword("A$@135".into())]
529 );
530}
531
532#[test]
533fn test_token_size() {
534 assert_eq!(std::mem::size_of::<Token>(), 72);
535 assert_eq!(std::mem::size_of::<Box<Token>>(), 8);
536}