use itertools::Itertools;
use crate::lexicon::SmtlibParse;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, logos::Logos)]
pub(crate) enum Token {
#[token("(")]
LParen,
#[token(")")]
RParen,
#[regex("0|([1-9][0-9]*)", priority = 2)]
Numeral,
#[regex(r"(0|([1-9][0-9]*))\.[0-9]+", priority = 2)]
Decimal,
#[regex("#x[0-9a-fA-F]+")]
Hexadecimal,
#[regex("#b[01]+")]
Binary,
#[regex(r#"("[^"]*")+"#)]
String,
#[token("_", priority = 2)]
#[token("!", priority = 2)]
#[token("as")]
#[token("BINARY")]
#[token("DECIMAL")]
#[token("exists")]
#[token("forall")]
#[token("HEXADECIMAL")]
#[token("let")]
#[token("match")]
#[token("NUMERAL")]
#[token("par")]
#[token("STRING")]
#[token("assert")]
#[token("check-sat")]
#[token("check-sat-assuming")]
#[token("declare-const")]
#[token("declare-datatype")]
#[token("declare-datatypes")]
#[token("declare-fun")]
#[token("declare-sort")]
#[token("define-fun")]
#[token("define-fun-rec")]
#[token("define-sort")]
#[token("echo")]
#[token("exit")]
#[token("get-assertions")]
#[token("get-assignment")]
#[token("get-info")]
#[token("get-model")]
#[token("get-option")]
#[token("get-proof")]
#[token("get-unsat-assumptions")]
#[token("get-unsat-core")]
#[token("get-value")]
#[token("pop")]
#[token("push")]
#[token("reset")]
#[token("reset-assertions")]
#[token("set-info")]
#[token("set-logic")]
#[token("set-option")]
Reserved,
#[regex(r"[~!@$%^&*_\-+=<>.?/a-zA-Z0-9]+", priority = 1)]
#[regex(r"\|[^|\\]+\|")]
Symbol,
#[regex(r":[~!@$%^&*_\-+=<>.?/a-zA-Z0-9]+")]
Keyword,
#[error]
#[regex(r"[\t\n\r ]+", logos::skip)]
#[regex(r";[^\n]*", logos::skip)]
Error,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SourceSpan {
offset: usize,
length: usize,
}
impl SourceSpan {
#[must_use]
pub fn offset(&self) -> usize {
self.offset
}
#[must_use]
pub fn len(&self) -> usize {
self.length
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[must_use]
pub fn join(&self, span: SourceSpan) -> SourceSpan {
let offset = self.offset.min(span.offset);
let end = self.end().max(span.end());
let length = end - offset;
SourceSpan { offset, length }
}
#[must_use]
pub fn end(&self) -> usize {
self.offset + self.length
}
#[must_use]
pub fn contains(&self, byte_offset: usize) -> bool {
self.offset() <= byte_offset && byte_offset < self.end()
}
#[must_use]
pub fn union(
init: SourceSpan,
span: impl IntoIterator<Item = Option<SourceSpan>>,
) -> SourceSpan {
span.into_iter()
.fold(init, |a, b| b.map(|b| a.join(b)).unwrap_or(a))
}
}
impl From<SourceSpan> for miette::SourceSpan {
fn from(s: SourceSpan) -> Self {
Self::new(s.offset.into(), s.length.into())
}
}
impl From<(usize, usize)> for SourceSpan {
fn from((offset, length): (usize, usize)) -> Self {
SourceSpan { offset, length }
}
}
#[derive(Debug, Clone, Copy, Eq)]
pub(crate) struct Spanned<T>(pub(crate) T, pub(crate) SourceSpan);
impl<S> From<Spanned<S>> for SourceSpan {
fn from(value: Spanned<S>) -> Self {
value.1
}
}
impl<S> From<Spanned<S>> for miette::SourceSpan {
fn from(value: Spanned<S>) -> Self {
value.1.into()
}
}
impl<S: std::fmt::Display> std::fmt::Display for Spanned<S> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl<T: PartialEq> PartialEq for Spanned<T> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl<T: std::hash::Hash> std::hash::Hash for Spanned<T> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.0.hash(state);
}
}
#[derive(Debug, thiserror::Error, miette::Diagnostic, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[error("{error}")]
pub struct ParseError {
error: String,
#[source_code]
src: String,
#[label("{}", label.as_ref().map(|s| s.as_str()).unwrap_or_else(|| "here"))]
err_span: SourceSpan,
label: Option<String>,
#[help]
help: Option<String>,
}
#[derive(Debug)]
pub(crate) struct Parser<'src> {
src: &'src str,
cursor: usize,
lexer: Vec<(Token, SourceSpan)>,
errors: Vec<ParseError>,
}
impl<'src> Parser<'src> {
pub(crate) fn new(src: &'src str) -> Self {
let lexer = logos::Lexer::<'src, Token>::new(src)
.spanned()
.map(|(t, r)| (t, SourceSpan::from((r.start, r.len()))))
.collect_vec();
Parser {
src,
cursor: 0,
lexer,
errors: vec![],
}
}
pub(crate) fn current(&self) -> Token {
self.nth(0)
}
pub(crate) fn nth_span(&self, n: usize) -> SourceSpan {
self.nth_raw(n).1
}
pub(crate) fn current_span(&self) -> SourceSpan {
self.nth_span(0)
}
pub(crate) fn nth_str(&self, n: usize) -> &'src str {
let span = self.nth_span(n);
&self.src[span.offset()..span.end()]
}
pub(crate) fn current_str(&self) -> &'src str {
self.nth_str(0)
}
#[allow(unused)]
pub(crate) fn current_src_str(&self) -> Spanned<&'src str> {
let span = self.current_span();
Spanned(self.current_str(), span)
}
pub(crate) fn nth_raw(&self, n: usize) -> (Token, SourceSpan) {
if self.cursor + n >= self.lexer.len() {
(Token::Error, SourceSpan::from((self.src.len(), 0)))
} else {
self.lexer[self.cursor + n]
}
}
pub(crate) fn nth(&self, n: usize) -> Token {
self.nth_raw(n).0
}
#[allow(unused)]
pub(crate) fn eoi(&self) -> bool {
self.cursor >= self.lexer.len()
}
#[allow(unused)]
pub(crate) fn pos(&self) -> usize {
self.cursor
}
pub(crate) fn bump(&mut self) {
self.cursor += 1
}
pub(crate) fn nth_matches(&self, n: usize, t: Token, s: &str) -> bool {
self.nth(n) == t && self.nth_str(n) == s
}
pub(crate) fn expect_matches(&mut self, t: Token, s: &str) -> Result<&'src str, ParseError> {
if self.nth_matches(0, t, s) {
let s = self.current_str();
self.bump();
Ok(s)
} else {
todo!()
}
}
pub(crate) fn expect(&mut self, t: Token) -> Result<&'src str, ParseError> {
match self.current() {
found if found == t => {
let s = self.current_str();
self.bump();
Ok(s)
}
found => {
let err = ParseError {
error: "Unrecognized token".into(),
src: self.src.into(),
err_span: self.current_span(),
label: Some(format!("Found {found:?} expected {t:?}")),
help: None,
};
self.errors.push(err.clone());
self.bump();
Err(err)
}
}
}
pub(crate) fn any<T: SmtlibParse>(&mut self) -> Result<Vec<T>, ParseError> {
let mut res = vec![];
while T::is_start_of(0, self) {
res.push(T::parse(self)?);
}
Ok(res)
}
pub(crate) fn non_zero<T: SmtlibParse>(&mut self) -> Result<Vec<T>, ParseError> {
let mut res = vec![T::parse(self)?];
while T::is_start_of(0, self) {
res.push(T::parse(self)?);
}
Ok(res)
}
pub(crate) fn n_plus_one<T: SmtlibParse>(&mut self) -> Result<Vec<T>, ParseError> {
self.non_zero()
}
pub(crate) fn stuck(&self, parsing: &str) -> ParseError {
ParseError {
error: format!("Parser stuck while parsing {parsing:?}"),
src: self.src.into(),
err_span: self.current_span(),
label: Some("The parser got stuck at this token".to_string()),
help: None,
}
}
}