use super::types::*;
use super::combinator::*;
use super::combinator::helpers::*;
use super::primitive::SchemePreimitives;
use crate::diagnostics::Span;
use std::collections::VecDeque;
#[derive(Debug, Clone, PartialEq)]
pub enum SchemeSexp<'a> {
Atom(SchemeAtom<'a>),
List(Vec<SchemeSexp<'a>>),
DottedPair(Box<SchemeSexp<'a>>, Box<SchemeSexp<'a>>),
Quote(Box<SchemeSexp<'a>>),
Quasiquote(Box<SchemeSexp<'a>>),
Unquote(Box<SchemeSexp<'a>>),
UnquoteSplicing(Box<SchemeSexp<'a>>),
}
#[derive(Debug, Clone, PartialEq)]
pub enum SchemeAtom<'a> {
Symbol(&'a str),
Integer(i64),
Float(f64),
Rational(i64, i64),
Complex(f64, f64),
String(&'a str),
Character(char),
Boolean(bool),
Nil,
}
pub struct SchemeParser;
impl SchemeParser {
pub fn s_expression<'a>() -> impl ParserCombinator<'a, SchemeSexp<'a>> {
SExpressionParser::new()
}
pub fn atom<'a>() -> impl ParserCombinator<'a, SchemeAtom<'a>> {
AtomParser::new()
}
pub fn list<'a>() -> impl ParserCombinator<'a, Vec<SchemeSexp<'a>>> {
ListParser::new()
}
pub fn dotted_pair<'a>() -> impl ParserCombinator<'a, (SchemeSexp<'a>, SchemeSexp<'a>)> {
DottedPairParser::new()
}
pub fn symbol<'a>() -> impl ParserCombinator<'a, &'a str> {
SymbolParser::new()
}
pub fn number<'a>() -> impl ParserCombinator<'a, SchemeAtom<'a>> {
NumberParser::new()
}
pub fn string<'a>() -> impl ParserCombinator<'a, &'a str> {
StringParser::new()
}
pub fn character<'a>() -> impl ParserCombinator<'a, char> {
CharacterParser::new()
}
pub fn boolean<'a>() -> impl ParserCombinator<'a, bool> {
BooleanParser::new()
}
pub fn skip_comments<'a>() -> impl ParserCombinator<'a, ()> {
CommentSkipper::new()
}
pub fn skip_whitespace_and_comments<'a>() -> impl ParserCombinator<'a, ()> {
WhitespaceSkipper::new()
}
}
struct SExpressionParser<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> SExpressionParser<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, SchemeSexp<'a>> for SExpressionParser<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, SchemeSexp<'a>> {
let whitespace_skipper = SchemeParser::skip_whitespace_and_comments();
let (input, _) = whitespace_skipper.parse(input)?;
if let Ok((remaining, _)) = char('\'').parse(input) {
let (remaining, expr) = Self::new().parse(remaining)?;
return Ok((remaining, SchemeSexp::Quote(Box::new(expr))));
}
if let Ok((remaining, _)) = char('`').parse(input) {
let (remaining, expr) = Self::new().parse(remaining)?;
return Ok((remaining, SchemeSexp::Quasiquote(Box::new(expr))));
}
if let Ok((remaining, _)) = tag(",@").parse(input) {
let (remaining, expr) = Self::new().parse(remaining)?;
return Ok((remaining, SchemeSexp::UnquoteSplicing(Box::new(expr))));
}
if let Ok((remaining, _)) = char(',').parse(input) {
let (remaining, expr) = Self::new().parse(remaining)?;
return Ok((remaining, SchemeSexp::Unquote(Box::new(expr))));
}
if let Ok((remaining, _)) = char('(').parse(input) {
let (remaining, _) = SchemeParser::skip_whitespace_and_comments().parse(remaining)?;
if let Ok((remaining, _)) = char(')').parse(remaining) {
return Ok((remaining, SchemeSexp::List(vec![])));
}
let (remaining, first) = Self::new().parse(remaining)?;
let (remaining, _) = SchemeParser::skip_whitespace_and_comments().parse(remaining)?;
if let Ok((remaining, _)) = char('.').parse(remaining) {
let (remaining, _) = SchemeParser::skip_whitespace_and_comments().parse(remaining)?;
let (remaining, second) = Self::new().parse(remaining)?;
let (remaining, _) = SchemeParser::skip_whitespace_and_comments().parse(remaining)?;
let (remaining, _) = char(')').parse(remaining)?;
return Ok((remaining, SchemeSexp::DottedPair(
Box::new(first),
Box::new(second)
)));
}
let mut elements = vec![first];
let mut remaining = remaining;
loop {
if let Ok((new_remaining, _)) = char(')').parse(remaining) {
return Ok((new_remaining, SchemeSexp::List(elements)));
}
let (new_remaining, expr) = Self::new().parse(remaining)?;
elements.push(expr);
let (new_remaining, _) = SchemeParser::skip_whitespace_and_comments().parse(new_remaining)?;
remaining = new_remaining;
}
}
let (remaining, atom) = SchemeParser::atom().parse(input)?;
Ok((remaining, SchemeSexp::Atom(atom)))
}
}
struct AtomParser<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> AtomParser<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, SchemeAtom<'a>> for AtomParser<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, SchemeAtom<'a>> {
if let Ok(result) = SchemeParser::number().parse(input) {
return Ok(result);
}
if let Ok((remaining, boolean)) = SchemeParser::boolean().parse(input) {
return Ok((remaining, SchemeAtom::Boolean(boolean)));
}
if let Ok((remaining, ch)) = SchemeParser::character().parse(input) {
return Ok((remaining, SchemeAtom::Character(ch)));
}
if let Ok((remaining, string)) = SchemeParser::string().parse(input) {
return Ok((remaining, SchemeAtom::String(string)));
}
if let Ok((remaining, symbol)) = SchemeParser::symbol().parse(input) {
return Ok((remaining, SchemeAtom::Symbol(symbol)));
}
Err(Box::new(ParseError::new(
"Expected atom (number, symbol, string, character, or boolean)".to_string(),
Span::new(0, 1)
)))
}
}
struct ListParser<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> ListParser<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, Vec<SchemeSexp<'a>>> for ListParser<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, Vec<SchemeSexp<'a>>> {
let (input, _) = char('(').parse(input)?;
let (input, _) = SchemeParser::skip_whitespace_and_comments().parse(input)?;
let mut elements = Vec::new();
let mut remaining = input;
loop {
if let Ok((new_remaining, _)) = char(')').parse(remaining) {
return Ok((new_remaining, elements));
}
let (new_remaining, expr) = SchemeParser::s_expression().parse(remaining)?;
elements.push(expr);
let (new_remaining, _) = SchemeParser::skip_whitespace_and_comments().parse(new_remaining)?;
remaining = new_remaining;
}
}
}
struct DottedPairParser<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> DottedPairParser<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, (SchemeSexp<'a>, SchemeSexp<'a>)> for DottedPairParser<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, (SchemeSexp<'a>, SchemeSexp<'a>)> {
let (input, _) = char('(').parse(input)?;
let (input, _) = SchemeParser::skip_whitespace_and_comments().parse(input)?;
let (input, first) = SchemeParser::s_expression().parse(input)?;
let (input, _) = SchemeParser::skip_whitespace_and_comments().parse(input)?;
let (input, _) = char('.').parse(input)?;
let (input, _) = SchemeParser::skip_whitespace_and_comments().parse(input)?;
let (input, second) = SchemeParser::s_expression().parse(input)?;
let (input, _) = SchemeParser::skip_whitespace_and_comments().parse(input)?;
let (input, _) = char(')').parse(input)?;
Ok((input, (first, second)))
}
}
struct SymbolParser<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> SymbolParser<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, &'a str> for SymbolParser<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, &'a str> {
let (remaining, _) = FnParser::new(SchemePreimitives::identifier_start()).parse(input)?;
let remaining_chars = FnParser::new(SchemePreimitives::identifier_continue()).many();
let (remaining, _) = remaining_chars.parse(remaining)?;
let consumed_len = input.len() - remaining.len();
let symbol = &input[..consumed_len];
Ok((remaining, symbol))
}
}
struct NumberParser<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> NumberParser<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, SchemeAtom<'a>> for NumberParser<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, SchemeAtom<'a>> {
let original_input = input;
let sign_parser = char('+').or(char('-')).optional();
let (remaining, sign) = sign_parser.parse(input)?;
let integer_parser = digit().many1();
let (remaining, _integer_digits) = integer_parser.parse(remaining)?;
if let Ok((remaining, _)) = char('.').parse(remaining) {
let (remaining, _fractional_digits) = digit().many().parse(remaining)?;
let number_len = original_input.len() - remaining.len();
let number_str = &original_input[..number_len];
let float_val: f64 = number_str.parse().map_err(|_| {
ParseError::new("Invalid floating point number".to_string(), Span::new(0, number_len))
})?;
return Ok((remaining, SchemeAtom::Float(float_val)));
}
let number_len = original_input.len() - remaining.len();
let number_str = &original_input[..number_len];
let int_val: i64 = number_str.parse().map_err(|_| {
ParseError::new("Invalid integer".to_string(), Span::new(0, number_len))
})?;
Ok((remaining, SchemeAtom::Integer(int_val)))
}
}
struct StringParser<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> StringParser<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, &'a str> for StringParser<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, &'a str> {
let (input, _) = char('"').parse(input)?;
let mut remaining = input;
let mut char_count = 0;
loop {
if let Ok((new_remaining, ch)) = any_char().parse(remaining) {
if ch == '"' {
let string_content = &input[..remaining.len() - new_remaining.len()];
return Ok((new_remaining, string_content));
} else if ch == '\\' {
if let Ok((escaped_remaining, _)) = any_char().parse(new_remaining) {
remaining = escaped_remaining;
char_count += 2;
} else {
return Err(Box::new(ParseError::new(
"Unexpected end of input in string escape".to_string(),
Span::new(0, char_count + 1)
)));
}
} else {
remaining = new_remaining;
char_count += 1;
}
} else {
return Err(Box::new(ParseError::new(
"Unterminated string literal".to_string(),
Span::new(0, char_count)
)));
}
}
}
}
struct CharacterParser<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> CharacterParser<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, char> for CharacterParser<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, char> {
let (input, _) = tag("#\\").parse(input)?;
if let Ok((remaining, _)) = tag("newline").parse(input) {
return Ok((remaining, '\n'));
}
if let Ok((remaining, _)) = tag("space").parse(input) {
return Ok((remaining, ' '));
}
if let Ok((remaining, _)) = tag("tab").parse(input) {
return Ok((remaining, '\t'));
}
if let Ok((remaining, _)) = tag("return").parse(input) {
return Ok((remaining, '\r'));
}
let (remaining, ch) = any_char().parse(input)?;
Ok((remaining, ch))
}
}
struct BooleanParser<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> BooleanParser<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, bool> for BooleanParser<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, bool> {
let true_parser = tag("#t").map(|_| true);
let false_parser = tag("#f").map(|_| false);
true_parser.or(false_parser).parse(input)
}
}
#[derive(Clone)]
struct CommentSkipper<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> CommentSkipper<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, ()> for CommentSkipper<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, ()> {
let line_comment = char(';')
.and(satisfy(|ch| ch != '\n').many())
.and(char('\n').optional())
.map(|_| ());
line_comment.parse(input)
}
}
struct WhitespaceSkipper<'a> {
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> WhitespaceSkipper<'a> {
fn new() -> Self {
Self { phantom: std::marker::PhantomData }
}
}
impl<'a> ParserCombinator<'a, ()> for WhitespaceSkipper<'a> {
fn parse(&self, input: Input<'a>) -> ParseResult<'a, ()> {
let whitespace_or_comment = whitespace().map(|_| ()).or(CommentSkipper::new());
let (remaining, _) = whitespace_or_comment.many().parse(input)?;
Ok((remaining, ()))
}
}
pub mod optimized {
use super::*;
pub struct OptimizedSExpressionParser<'a> {
memo_cache: std::collections::HashMap<usize, ParseResult<'a, SchemeSexp<'a>>>,
phantom: std::marker::PhantomData<&'a ()>,
}
impl<'a> Default for OptimizedSExpressionParser<'a> {
fn default() -> Self {
Self::new()
}
}
impl<'a> OptimizedSExpressionParser<'a> {
pub fn new() -> Self {
Self {
memo_cache: std::collections::HashMap::new(),
phantom: std::marker::PhantomData,
}
}
pub fn parse_batch(&mut self, input: Input<'a>) -> ParseResult<'a, Vec<SchemeSexp<'a>>> {
let mut expressions = Vec::new();
let mut remaining = input;
while !remaining.is_empty() {
let (new_remaining, _) = SchemeParser::skip_whitespace_and_comments().parse(remaining)?;
if new_remaining.is_empty() {
break;
}
let (new_remaining, expr) = SchemeParser::s_expression().parse(new_remaining)?;
expressions.push(expr);
remaining = new_remaining;
}
Ok((remaining, expressions))
}
pub fn simd_optimized_symbol_parsing(&self, _input: Input<'a>) {
}
}
}
#[cfg(test)]
#[allow(unused_imports, dead_code)]
mod tests {
use super::*;
#[test]
fn test_symbol_parsing() {
let parser = SchemeParser::symbol();
let result = parser.parse("hello-world");
assert!(result.is_ok());
let (remaining, symbol) = result.unwrap();
assert_eq!(symbol, "hello-world");
assert_eq!(remaining, "");
let result = parser.parse("+123");
assert!(result.is_ok());
let (remaining, symbol) = result.unwrap();
assert_eq!(symbol, "+123");
assert_eq!(remaining, "");
}
#[test]
fn test_number_parsing() {
let parser = SchemeParser::number();
let result = parser.parse("42");
assert!(result.is_ok());
let (remaining, atom) = result.unwrap();
assert_eq!(atom, SchemeAtom::Integer(42));
assert_eq!(remaining, "");
let result = parser.parse("3.14");
assert!(result.is_ok());
let (remaining, atom) = result.unwrap();
assert_eq!(atom, SchemeAtom::Float(3.14));
assert_eq!(remaining, "");
let result = parser.parse("-123");
assert!(result.is_ok());
let (remaining, atom) = result.unwrap();
assert_eq!(atom, SchemeAtom::Integer(-123));
assert_eq!(remaining, "");
}
#[test]
fn test_boolean_parsing() {
let parser = SchemeParser::boolean();
let result = parser.parse("#t");
assert!(result.is_ok());
let (remaining, boolean) = result.unwrap();
assert_eq!(boolean, true);
assert_eq!(remaining, "");
let result = parser.parse("#f");
assert!(result.is_ok());
let (remaining, boolean) = result.unwrap();
assert_eq!(boolean, false);
assert_eq!(remaining, "");
}
#[test]
fn test_character_parsing() {
let parser = SchemeParser::character();
let result = parser.parse("#\\a");
assert!(result.is_ok());
let (remaining, character) = result.unwrap();
assert_eq!(character, 'a');
assert_eq!(remaining, "");
let result = parser.parse("#\\newline");
assert!(result.is_ok());
let (remaining, character) = result.unwrap();
assert_eq!(character, 'n');
assert_eq!(remaining, "ewline");
}
#[test]
fn test_string_parsing() {
let parser = SchemeParser::string();
let result = parser.parse("\"hello world\"");
assert!(result.is_ok());
let (remaining, string) = result.unwrap();
assert_eq!(string, "hello world");
assert_eq!(remaining, "");
}
#[test]
fn test_list_parsing() {
let parser = SchemeParser::list();
let result = parser.parse("()");
assert!(result.is_ok());
let (remaining, list) = result.unwrap();
assert_eq!(list, vec![]);
assert_eq!(remaining, "");
let result = parser.parse("(1 2 3)");
assert!(result.is_ok());
let (remaining, list) = result.unwrap();
assert_eq!(list.len(), 3);
assert_eq!(remaining, "");
}
#[test]
fn test_s_expression_parsing() {
let parser = SchemeParser::s_expression();
let result = parser.parse("42");
assert!(result.is_ok());
let (remaining, sexp) = result.unwrap();
assert_eq!(sexp, SchemeSexp::Atom(SchemeAtom::Integer(42)));
assert_eq!(remaining, "");
let result = parser.parse("'hello");
assert!(result.is_ok());
let (remaining, sexp) = result.unwrap();
if let SchemeSexp::Quote(inner) = sexp {
assert_eq!(*inner, SchemeSexp::Atom(SchemeAtom::Symbol("hello")));
} else {
panic!("Expected quoted expression");
}
assert_eq!(remaining, "");
let result = parser.parse("(+ 1 2)");
assert!(result.is_ok());
let (remaining, sexp) = result.unwrap();
if let SchemeSexp::List(elements) = sexp {
assert_eq!(elements.len(), 3);
assert_eq!(elements[0], SchemeSexp::Atom(SchemeAtom::Symbol("+")));
assert_eq!(elements[1], SchemeSexp::Atom(SchemeAtom::Integer(1)));
assert_eq!(elements[2], SchemeSexp::Atom(SchemeAtom::Integer(2)));
} else {
panic!("Expected list expression");
}
assert_eq!(remaining, "");
}
#[test]
fn test_dotted_pair_parsing() {
let parser = SchemeParser::s_expression();
let result = parser.parse("(a . b)");
assert!(result.is_ok());
let (remaining, sexp) = result.unwrap();
if let SchemeSexp::DottedPair(first, second) = sexp {
assert_eq!(*first, SchemeSexp::Atom(SchemeAtom::Symbol("a")));
assert_eq!(*second, SchemeSexp::Atom(SchemeAtom::Symbol("b")));
} else {
panic!("Expected dotted pair");
}
assert_eq!(remaining, "");
}
#[test]
fn test_optimized_batch_parsing() {
let mut parser = optimized::OptimizedSExpressionParser::new();
let result = parser.parse_batch("(+ 1 2) (* 3 4) 'hello");
assert!(result.is_ok());
let (remaining, expressions) = result.unwrap();
assert_eq!(expressions.len(), 3);
assert_eq!(remaining, "");
}
}