use crate::chc::{ChcSystem, PredId, PredicateApp};
use oxiz_core::sort::SortId;
use oxiz_core::{TermId, TermManager};
use std::collections::HashMap;
use thiserror::Error;
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
LParen,
RParen,
Symbol(String),
Keyword(String),
StringLit(String),
Numeral(String),
Decimal(String),
}
pub struct Lexer {
input: Vec<char>,
pos: usize,
}
impl Lexer {
pub fn new(input: &str) -> Self {
Self {
input: input.chars().collect(),
pos: 0,
}
}
pub fn next_token(&mut self) -> Result<Option<Token>, ParseError> {
self.skip_whitespace_and_comments()?;
if self.pos >= self.input.len() {
return Ok(None);
}
let ch = self.input[self.pos];
match ch {
'(' => {
self.pos += 1;
Ok(Some(Token::LParen))
}
')' => {
self.pos += 1;
Ok(Some(Token::RParen))
}
'"' => self.read_string(),
':' => self.read_keyword(),
'0'..='9' => self.read_number(),
_ if Self::is_symbol_char(ch) => self.read_symbol(),
_ => Err(ParseError::InvalidSyntax(format!(
"unexpected character: '{}'",
ch
))),
}
}
fn skip_whitespace_and_comments(&mut self) -> Result<(), ParseError> {
while self.pos < self.input.len() {
let ch = self.input[self.pos];
if ch.is_whitespace() {
self.pos += 1;
} else if ch == ';' {
while self.pos < self.input.len() && self.input[self.pos] != '\n' {
self.pos += 1;
}
} else {
break;
}
}
Ok(())
}
fn read_string(&mut self) -> Result<Option<Token>, ParseError> {
self.pos += 1;
let mut s = String::new();
while self.pos < self.input.len() {
let ch = self.input[self.pos];
if ch == '"' {
self.pos += 1;
return Ok(Some(Token::StringLit(s)));
} else if ch == '\\' && self.pos + 1 < self.input.len() {
self.pos += 1;
s.push(self.input[self.pos]);
self.pos += 1;
} else {
s.push(ch);
self.pos += 1;
}
}
Err(ParseError::InvalidSyntax(
"unterminated string literal".to_string(),
))
}
fn read_keyword(&mut self) -> Result<Option<Token>, ParseError> {
self.pos += 1;
let start = self.pos;
while self.pos < self.input.len() && Self::is_symbol_char(self.input[self.pos]) {
self.pos += 1;
}
let keyword: String = self.input[start..self.pos].iter().collect();
Ok(Some(Token::Keyword(keyword)))
}
fn read_number(&mut self) -> Result<Option<Token>, ParseError> {
let start = self.pos;
let mut has_dot = false;
while self.pos < self.input.len() {
let ch = self.input[self.pos];
if ch.is_ascii_digit() {
self.pos += 1;
} else if ch == '.' && !has_dot {
has_dot = true;
self.pos += 1;
} else {
break;
}
}
let number: String = self.input[start..self.pos].iter().collect();
if has_dot {
Ok(Some(Token::Decimal(number)))
} else {
Ok(Some(Token::Numeral(number)))
}
}
fn read_symbol(&mut self) -> Result<Option<Token>, ParseError> {
let start = self.pos;
while self.pos < self.input.len() && Self::is_symbol_char(self.input[self.pos]) {
self.pos += 1;
}
let symbol: String = self.input[start..self.pos].iter().collect();
Ok(Some(Token::Symbol(symbol)))
}
fn is_symbol_char(ch: char) -> bool {
ch.is_alphanumeric()
|| ch == '_'
|| ch == '-'
|| ch == '+'
|| ch == '*'
|| ch == '/'
|| ch == '<'
|| ch == '>'
|| ch == '='
|| ch == '!'
|| ch == '?'
|| ch == '.'
}
pub fn tokenize(&mut self) -> Result<Vec<Token>, ParseError> {
let mut tokens = Vec::new();
while let Some(token) = self.next_token()? {
tokens.push(token);
}
Ok(tokens)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum SExpr {
Atom(Token),
List(Vec<SExpr>),
}
impl SExpr {
pub fn is_list(&self) -> bool {
matches!(self, SExpr::List(_))
}
pub fn is_atom(&self) -> bool {
matches!(self, SExpr::Atom(_))
}
pub fn as_symbol(&self) -> Option<&str> {
match self {
SExpr::Atom(Token::Symbol(s)) => Some(s),
_ => None,
}
}
pub fn as_list(&self) -> Option<&[SExpr]> {
match self {
SExpr::List(items) => Some(items),
_ => None,
}
}
}
pub struct SExprParser {
tokens: Vec<Token>,
pos: usize,
}
impl SExprParser {
pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, pos: 0 }
}
pub fn parse_sexpr(&mut self) -> Result<SExpr, ParseError> {
if self.pos >= self.tokens.len() {
return Err(ParseError::InvalidSyntax(
"unexpected end of input".to_string(),
));
}
let token = &self.tokens[self.pos];
match token {
Token::LParen => {
self.pos += 1;
let mut items = Vec::new();
while self.pos < self.tokens.len() {
if matches!(self.tokens[self.pos], Token::RParen) {
self.pos += 1;
return Ok(SExpr::List(items));
}
items.push(self.parse_sexpr()?);
}
Err(ParseError::InvalidSyntax(
"unclosed parenthesis".to_string(),
))
}
Token::RParen => Err(ParseError::InvalidSyntax(
"unexpected closing parenthesis".to_string(),
)),
_ => {
let atom = self.tokens[self.pos].clone();
self.pos += 1;
Ok(SExpr::Atom(atom))
}
}
}
pub fn parse_all(&mut self) -> Result<Vec<SExpr>, ParseError> {
let mut exprs = Vec::new();
while self.pos < self.tokens.len() {
exprs.push(self.parse_sexpr()?);
}
Ok(exprs)
}
pub fn parse_str(input: &str) -> Result<Vec<SExpr>, ParseError> {
let mut lexer = Lexer::new(input);
let tokens = lexer.tokenize()?;
let mut parser = SExprParser::new(tokens);
parser.parse_all()
}
}
#[derive(Error, Debug)]
pub enum ParseError {
#[error("invalid syntax: {0}")]
InvalidSyntax(String),
#[error("undefined symbol: {0}")]
UndefinedSymbol(String),
#[error("type error: {0}")]
TypeError(String),
#[error("unsupported feature: {0}")]
Unsupported(String),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
}
pub struct ChcParser<'a> {
#[allow(dead_code)]
terms: &'a mut TermManager,
system: ChcSystem,
predicates: HashMap<String, PredId>,
#[allow(dead_code)]
variables: HashMap<String, TermId>,
}
impl<'a> ChcParser<'a> {
pub fn new(terms: &'a mut TermManager) -> Self {
Self {
terms,
system: ChcSystem::new(),
predicates: HashMap::new(),
variables: HashMap::new(),
}
}
pub fn parse(&mut self, input: &str) -> Result<ChcSystem, ParseError> {
let mut lexer = Lexer::new(input);
let tokens = lexer.tokenize()?;
let mut parser = SExprParser::new(tokens);
let exprs = parser.parse_all()?;
for expr in exprs {
self.process_command(&expr)?;
}
Ok(std::mem::take(&mut self.system))
}
fn process_command(&mut self, expr: &SExpr) -> Result<(), ParseError> {
let items = expr
.as_list()
.ok_or_else(|| ParseError::InvalidSyntax("expected command as list".to_string()))?;
if items.is_empty() {
return Ok(());
}
let cmd = items[0]
.as_symbol()
.ok_or_else(|| ParseError::InvalidSyntax("expected command name".to_string()))?;
match cmd {
"set-logic" => {
Ok(())
}
"declare-fun" => {
if items.len() < 4 {
return Err(ParseError::InvalidSyntax(
"declare-fun requires name, arg sorts, and return sort".to_string(),
));
}
let name = items[1].as_symbol().ok_or_else(|| {
ParseError::InvalidSyntax("expected predicate name".to_string())
})?;
let arg_sorts_list = items[2].as_list().ok_or_else(|| {
ParseError::InvalidSyntax("expected argument sort list".to_string())
})?;
let arg_sorts: Vec<SortId> = arg_sorts_list
.iter()
.map(|s| {
let sort_name = s.as_symbol().ok_or_else(|| {
ParseError::InvalidSyntax("expected sort name".to_string())
})?;
Ok(self.parse_sort(sort_name))
})
.collect::<Result<Vec<_>, ParseError>>()?;
let pred_id = self.system.declare_predicate(name, arg_sorts);
self.predicates.insert(name.to_string(), pred_id);
Ok(())
}
"assert" => {
if items.len() < 2 {
return Err(ParseError::InvalidSyntax(
"assert requires a formula".to_string(),
));
}
let formula = self.parse_term(&items[1])?;
self.process_assertion(formula)?;
Ok(())
}
"check-sat" => {
Ok(())
}
_ => {
Ok(())
}
}
}
fn parse_sort(&self, name: &str) -> SortId {
match name {
"Bool" => self.terms.sorts.bool_sort,
"Int" => self.terms.sorts.int_sort,
"Real" => self.terms.sorts.real_sort,
_ => {
self.terms.sorts.bool_sort
}
}
}
fn parse_term(&mut self, expr: &SExpr) -> Result<TermId, ParseError> {
match expr {
SExpr::Atom(token) => self.parse_atom(token),
SExpr::List(items) => self.parse_application(items),
}
}
fn parse_atom(&mut self, token: &Token) -> Result<TermId, ParseError> {
match token {
Token::Symbol(s) => {
match s.as_str() {
"true" => Ok(self.terms.mk_true()),
"false" => Ok(self.terms.mk_false()),
_ => {
if let Some(&var) = self.variables.get(s) {
Ok(var)
} else {
let var = self.terms.mk_var(s, self.terms.sorts.int_sort);
self.variables.insert(s.clone(), var);
Ok(var)
}
}
}
}
Token::Numeral(n) => {
let value = n
.parse::<i64>()
.map_err(|_| ParseError::TypeError(format!("invalid integer: {}", n)))?;
Ok(self.terms.mk_int(value))
}
Token::Decimal(d) => {
let parts: Vec<&str> = d.split('.').collect();
if parts.len() != 2 {
return Err(ParseError::TypeError(format!("invalid decimal: {}", d)));
}
let _whole: i64 = parts[0].parse().map_err(|_| {
ParseError::TypeError(format!("invalid decimal whole part: {}", parts[0]))
})?;
let _frac = parts[1];
Ok(self.terms.mk_int(0))
}
_ => Err(ParseError::Unsupported(format!(
"unsupported token type: {:?}",
token
))),
}
}
fn parse_application(&mut self, items: &[SExpr]) -> Result<TermId, ParseError> {
if items.is_empty() {
return Err(ParseError::InvalidSyntax("empty application".to_string()));
}
let func_name = items[0]
.as_symbol()
.ok_or_else(|| ParseError::InvalidSyntax("expected function name".to_string()))?;
match func_name {
"and" => {
let args: Vec<TermId> = items[1..]
.iter()
.map(|e| self.parse_term(e))
.collect::<Result<Vec<_>, _>>()?;
Ok(self.terms.mk_and(args))
}
"or" => {
let args: Vec<TermId> = items[1..]
.iter()
.map(|e| self.parse_term(e))
.collect::<Result<Vec<_>, _>>()?;
Ok(self.terms.mk_or(args))
}
"not" => {
if items.len() != 2 {
return Err(ParseError::InvalidSyntax(
"not requires 1 argument".to_string(),
));
}
let arg = self.parse_term(&items[1])?;
Ok(self.terms.mk_not(arg))
}
"=>" | "implies" => {
if items.len() != 3 {
return Err(ParseError::InvalidSyntax(
"implies requires 2 arguments".to_string(),
));
}
let lhs = self.parse_term(&items[1])?;
let rhs = self.parse_term(&items[2])?;
Ok(self.terms.mk_implies(lhs, rhs))
}
"=" => {
if items.len() != 3 {
return Err(ParseError::InvalidSyntax(
"= requires 2 arguments".to_string(),
));
}
let lhs = self.parse_term(&items[1])?;
let rhs = self.parse_term(&items[2])?;
Ok(self.terms.mk_eq(lhs, rhs))
}
"<" => {
if items.len() != 3 {
return Err(ParseError::InvalidSyntax(
"< requires 2 arguments".to_string(),
));
}
let lhs = self.parse_term(&items[1])?;
let rhs = self.parse_term(&items[2])?;
Ok(self.terms.mk_lt(lhs, rhs))
}
"<=" => {
if items.len() != 3 {
return Err(ParseError::InvalidSyntax(
"<= requires 2 arguments".to_string(),
));
}
let lhs = self.parse_term(&items[1])?;
let rhs = self.parse_term(&items[2])?;
Ok(self.terms.mk_le(lhs, rhs))
}
">" => {
if items.len() != 3 {
return Err(ParseError::InvalidSyntax(
"> requires 2 arguments".to_string(),
));
}
let lhs = self.parse_term(&items[1])?;
let rhs = self.parse_term(&items[2])?;
Ok(self.terms.mk_gt(lhs, rhs))
}
">=" => {
if items.len() != 3 {
return Err(ParseError::InvalidSyntax(
">= requires 2 arguments".to_string(),
));
}
let lhs = self.parse_term(&items[1])?;
let rhs = self.parse_term(&items[2])?;
Ok(self.terms.mk_ge(lhs, rhs))
}
"+" => {
let args: Vec<TermId> = items[1..]
.iter()
.map(|e| self.parse_term(e))
.collect::<Result<Vec<_>, _>>()?;
Ok(self.terms.mk_add(args))
}
"-" => {
if items.len() == 2 {
let arg = self.parse_term(&items[1])?;
let zero = self.terms.mk_int(0);
Ok(self.terms.mk_sub(zero, arg))
} else if items.len() == 3 {
let lhs = self.parse_term(&items[1])?;
let rhs = self.parse_term(&items[2])?;
Ok(self.terms.mk_sub(lhs, rhs))
} else {
Err(ParseError::InvalidSyntax(
"- requires 1 or 2 arguments".to_string(),
))
}
}
"*" => {
let args: Vec<TermId> = items[1..]
.iter()
.map(|e| self.parse_term(e))
.collect::<Result<Vec<_>, _>>()?;
Ok(self.terms.mk_mul(args))
}
"forall" => self.parse_quantifier(items, true),
"exists" => self.parse_quantifier(items, false),
_ => {
if let Some(&_pred_id) = self.predicates.get(func_name) {
let _args: Vec<TermId> = items[1..]
.iter()
.map(|e| self.parse_term(e))
.collect::<Result<Vec<_>, _>>()?;
Ok(self.terms.mk_true())
} else {
Err(ParseError::UndefinedSymbol(func_name.to_string()))
}
}
}
}
fn parse_quantifier(&mut self, items: &[SExpr], is_forall: bool) -> Result<TermId, ParseError> {
if items.len() != 3 {
return Err(ParseError::InvalidSyntax(
"quantifier requires variable list and body".to_string(),
));
}
let var_list = items[1].as_list().ok_or_else(|| {
ParseError::InvalidSyntax("expected variable declaration list".to_string())
})?;
let mut quantified_vars = Vec::new();
let old_vars = self.variables.clone();
for var_decl in var_list {
let decl_items = var_decl.as_list().ok_or_else(|| {
ParseError::InvalidSyntax("expected variable declaration".to_string())
})?;
if decl_items.len() != 2 {
return Err(ParseError::InvalidSyntax(
"variable declaration must be (name sort)".to_string(),
));
}
let var_name = decl_items[0]
.as_symbol()
.ok_or_else(|| ParseError::InvalidSyntax("expected variable name".to_string()))?;
let sort_name = decl_items[1]
.as_symbol()
.ok_or_else(|| ParseError::InvalidSyntax("expected sort name".to_string()))?;
let sort = self.parse_sort(sort_name);
let var = self.terms.mk_var(var_name, sort);
self.variables.insert(var_name.to_string(), var);
quantified_vars.push((var_name, sort));
}
let body = self.parse_term(&items[2])?;
self.variables = old_vars;
if is_forall {
Ok(self.terms.mk_forall(quantified_vars, body))
} else {
Ok(self.terms.mk_exists(quantified_vars, body))
}
}
fn process_assertion(&mut self, _formula: TermId) -> Result<(), ParseError> {
Ok(())
}
#[allow(dead_code)]
fn parse_simple_rule(&mut self, _rule_text: &str) -> Result<(), ParseError> {
Err(ParseError::Unsupported(
"Simple rule parser not yet implemented".to_string(),
))
}
pub fn system(self) -> ChcSystem {
self.system
}
pub fn declare_predicate(
&mut self,
name: &str,
arg_sorts: impl IntoIterator<Item = oxiz_core::SortId>,
) -> PredId {
let id = self.system.declare_predicate(name, arg_sorts);
self.predicates.insert(name.to_string(), id);
id
}
#[allow(clippy::too_many_arguments)]
pub fn add_init_rule(
&mut self,
vars: impl IntoIterator<Item = (String, oxiz_core::SortId)>,
constraint: TermId,
head_pred: PredId,
head_args: impl IntoIterator<Item = TermId>,
) {
self.system
.add_init_rule(vars, constraint, head_pred, head_args);
}
#[allow(clippy::too_many_arguments)]
pub fn add_transition_rule(
&mut self,
vars: impl IntoIterator<Item = (String, oxiz_core::SortId)>,
body_preds: impl IntoIterator<Item = PredicateApp>,
constraint: TermId,
head_pred: PredId,
head_args: impl IntoIterator<Item = TermId>,
) {
self.system
.add_transition_rule(vars, body_preds, constraint, head_pred, head_args);
}
pub fn add_query(
&mut self,
vars: impl IntoIterator<Item = (String, oxiz_core::SortId)>,
body_preds: impl IntoIterator<Item = PredicateApp>,
constraint: TermId,
) {
self.system.add_query(vars, body_preds, constraint);
}
}
pub struct ChcCompBuilder<'a> {
parser: ChcParser<'a>,
}
impl<'a> ChcCompBuilder<'a> {
pub fn new(terms: &'a mut TermManager) -> Self {
Self {
parser: ChcParser::new(terms),
}
}
pub fn from_file(&mut self, path: &str) -> Result<(), ParseError> {
let contents = std::fs::read_to_string(path)
.map_err(|e| ParseError::Unsupported(format!("Failed to read file {}: {}", path, e)))?;
self.from_str(&contents)
}
pub fn from_str(&mut self, input: &str) -> Result<(), ParseError> {
self.parser.parse(input)?;
Ok(())
}
pub fn build(self) -> ChcSystem {
self.parser.system()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parser_creation() {
let mut terms = TermManager::new();
let parser = ChcParser::new(&mut terms);
let system = parser.system();
assert!(system.is_empty());
}
#[test]
fn test_programmatic_construction() {
let mut terms = TermManager::new();
let int_sort = terms.sorts.int_sort;
let x = terms.mk_var("x", int_sort);
let zero = terms.mk_int(0);
let constraint = terms.mk_eq(x, zero);
let mut parser = ChcParser::new(&mut terms);
let inv = parser.declare_predicate("Inv", [int_sort]);
parser.add_init_rule([("x".to_string(), int_sort)], constraint, inv, [x]);
let system = parser.system();
assert_eq!(system.num_predicates(), 1);
assert_eq!(system.num_rules(), 1);
}
#[test]
fn test_full_parse_basic() {
let mut terms = TermManager::new();
let mut parser = ChcParser::new(&mut terms);
let result = parser.parse("(set-logic HORN)");
assert!(result.is_ok());
}
#[test]
fn test_parse_predicate_declaration() {
let mut terms = TermManager::new();
let mut parser = ChcParser::new(&mut terms);
let input = "(declare-fun P (Int Bool) Bool)";
let result = parser.parse(input);
assert!(result.is_ok());
let system = result.expect("test operation should succeed");
assert_eq!(system.num_predicates(), 1);
}
#[test]
fn test_parse_arithmetic() {
let mut terms = TermManager::new();
let mut parser = ChcParser::new(&mut terms);
let result = parser.parse_atom(&Token::Numeral("42".to_string()));
assert!(result.is_ok());
let expr = SExpr::List(vec![
SExpr::Atom(Token::Symbol("+".to_string())),
SExpr::Atom(Token::Numeral("1".to_string())),
SExpr::Atom(Token::Numeral("2".to_string())),
]);
let result = parser.parse_term(&expr);
assert!(result.is_ok());
}
#[test]
fn test_lexer_basic() {
let mut lexer = Lexer::new("(set-logic HORN)");
let tokens = lexer.tokenize().expect("test operation should succeed");
assert_eq!(tokens.len(), 4);
assert_eq!(tokens[0], Token::LParen);
assert_eq!(tokens[1], Token::Symbol("set-logic".to_string()));
assert_eq!(tokens[2], Token::Symbol("HORN".to_string()));
assert_eq!(tokens[3], Token::RParen);
}
#[test]
fn test_lexer_keywords() {
let mut lexer = Lexer::new(":name :type");
let tokens = lexer.tokenize().expect("test operation should succeed");
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], Token::Keyword("name".to_string()));
assert_eq!(tokens[1], Token::Keyword("type".to_string()));
}
#[test]
fn test_lexer_numbers() {
let mut lexer = Lexer::new("42 3.14");
let tokens = lexer.tokenize().expect("test operation should succeed");
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0], Token::Numeral("42".to_string()));
assert_eq!(tokens[1], Token::Decimal("3.14".to_string()));
}
#[test]
fn test_lexer_string() {
let mut lexer = Lexer::new(r#""hello world""#);
let tokens = lexer.tokenize().expect("test operation should succeed");
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0], Token::StringLit("hello world".to_string()));
}
#[test]
fn test_lexer_comments() {
let mut lexer = Lexer::new("; this is a comment\n(foo bar)");
let tokens = lexer.tokenize().expect("test operation should succeed");
assert_eq!(tokens.len(), 4);
assert_eq!(tokens[0], Token::LParen);
assert_eq!(tokens[1], Token::Symbol("foo".to_string()));
assert_eq!(tokens[2], Token::Symbol("bar".to_string()));
assert_eq!(tokens[3], Token::RParen);
}
#[test]
fn test_sexpr_parser_atom() {
let exprs = SExprParser::parse_str("foo").expect("test operation should succeed");
assert_eq!(exprs.len(), 1);
assert!(exprs[0].is_atom());
assert_eq!(exprs[0].as_symbol(), Some("foo"));
}
#[test]
fn test_sexpr_parser_list() {
let exprs = SExprParser::parse_str("(foo bar)").expect("test operation should succeed");
assert_eq!(exprs.len(), 1);
assert!(exprs[0].is_list());
let list = exprs[0].as_list().expect("test operation should succeed");
assert_eq!(list.len(), 2);
assert_eq!(list[0].as_symbol(), Some("foo"));
assert_eq!(list[1].as_symbol(), Some("bar"));
}
#[test]
fn test_sexpr_parser_nested() {
let exprs =
SExprParser::parse_str("(foo (bar baz) qux)").expect("test operation should succeed");
assert_eq!(exprs.len(), 1);
let list = exprs[0].as_list().expect("test operation should succeed");
assert_eq!(list.len(), 3);
assert_eq!(list[0].as_symbol(), Some("foo"));
assert!(list[1].is_list());
assert_eq!(list[2].as_symbol(), Some("qux"));
let inner = list[1].as_list().expect("test operation should succeed");
assert_eq!(inner.len(), 2);
assert_eq!(inner[0].as_symbol(), Some("bar"));
assert_eq!(inner[1].as_symbol(), Some("baz"));
}
#[test]
fn test_sexpr_parser_multiple() {
let exprs = SExprParser::parse_str("(foo) (bar)").expect("test operation should succeed");
assert_eq!(exprs.len(), 2);
assert!(exprs[0].is_list());
assert!(exprs[1].is_list());
}
#[test]
fn test_sexpr_parser_error_unclosed() {
let result = SExprParser::parse_str("(foo bar");
assert!(result.is_err());
}
#[test]
fn test_sexpr_parser_error_unexpected_close() {
let result = SExprParser::parse_str("foo)");
assert!(result.is_err());
}
}