use crate::{
parser::{code, predicate, prelude::*, Code, Input, ParseResult, Span},
value::Value,
};
use super::{
code::{
ERR_SYMBOL_CLOSING, ERR_SYMBOL_CODE, ERR_SYMBOL_ESCAPE, ERR_SYMBOL_LITERAL,
ERR_SYMBOL_SINGLE_DOT,
},
Sexpr, SexprError,
};
impl<'a, 'b, const ECO: Code> Sexpr<'a, ECO> {
#[allow(clippy::missing_panics_doc)]
pub fn symbol(&self) -> impl Fn(Input<'b>) -> ParseResult<'b, Value<'a>> + '_ {
let m = self.m.clone();
move |input| match input.current() {
Some(ch) if Self::is_initial(ch) => {
let (symbol, cursor) = take_while1(Self::is_subsequent)(input).unwrap();
Ok((m.borrow_mut().new_symbol(symbol).into(), cursor))
}
Some('+' | '-') => {
let mut parse = recognize(pair(
one_of("+-"),
opt(any((
map(
pair(
satisfy(Self::is_sign_subsequent),
take_while0(Self::is_subsequent),
),
|_| (),
),
map(
tuple((
char('.'),
satisfy(Self::is_dot_subsequent),
take_while0(Self::is_subsequent),
)),
|_| (),
),
))),
));
let (symbol, cursor) = parse(input).unwrap();
Ok((m.borrow_mut().new_symbol(symbol).into(), cursor))
}
Some('.') => {
let mut parse = recognize(tuple((
char('.'),
satisfy(Self::is_dot_subsequent),
take_while0(Self::is_subsequent),
)));
let (symbol, cursor) = parse(input).map_err(|mut err| {
let span = err.span();
err.push(
span,
ERR_SYMBOL_SINGLE_DOT + ECO,
SexprError::SymbolSingleDot,
);
err
})?;
Ok((m.borrow_mut().new_symbol(symbol).into(), cursor))
}
Some('|') => {
let mut parse = delimited(char('|'), Self::symbol_text, char('|'));
let (symbol, cursor) = parse(input).map_err(|mut err| {
let span = err.span();
if err.code() == code::ERR_TERMINATED {
err.push(span, ERR_SYMBOL_CLOSING + ECO, SexprError::SymbolClosing);
}
err
})?;
Ok((m.borrow_mut().new_symbol(symbol).into(), cursor))
}
Some(_) => Err(ParseError::new(
input.position(),
ERR_SYMBOL_LITERAL + ECO,
SexprError::SymbolLiteral,
)),
None => Err(ParseError::eof(input.position()).and(
input.position(),
ERR_SYMBOL_LITERAL + ECO,
SexprError::SymbolLiteral,
)),
}
}
fn symbol_text(input: Input<'b>) -> ParseResult<'b, String> {
let mut cursor = input;
let mut text = String::new();
while let Some(ch) = cursor.current() {
match ch {
'\\' => {
let (ch, rest) = Self::symbol_escape(cursor)?;
text.push(ch);
cursor = rest;
}
'|' => break,
_ => {
text.push(ch);
cursor.advance();
}
}
}
Ok((text, cursor))
}
fn symbol_escape(input: Input<'b>) -> ParseResult<'b, char> {
let mut cursor = input;
match cursor.current() {
Some('\\') => {
let escape = match cursor.advance() {
Some('a') => '\x07',
Some('b') => '\x08',
Some('t') => '\t',
Some('n') => '\n',
Some('r') => '\r',
Some('\\') => '\\',
Some('|') => '|',
Some('x') => {
cursor.advance();
let (ch, cursor) = context_as(
terminated(Self::character_code, char(';')),
ERR_SYMBOL_CODE + ECO,
SexprError::SymbolCode,
)(cursor)?;
return Ok((ch, cursor));
}
Some(_) => {
return Err(ParseError::new(
Span::new(input.position(), cursor.position()),
ERR_SYMBOL_ESCAPE + ECO,
SexprError::SymbolEscape,
));
}
None => {
return Err(ParseError::eof(cursor.position()).and(
cursor.position(),
ERR_SYMBOL_ESCAPE + ECO,
SexprError::StringEscape,
))
}
};
cursor.advance();
Ok((escape, cursor))
}
Some(_) => Err(ParseError::new(
input.position(),
ERR_SYMBOL_ESCAPE + ECO,
SexprError::SymbolEscape,
)),
None => Err(ParseError::eof(cursor.position()).and(
cursor.position(),
ERR_SYMBOL_ESCAPE + ECO,
SexprError::SymbolEscape,
)),
}
}
fn is_initial(ch: char) -> bool {
predicate::is_alpha(ch)
|| matches!(
ch,
'!' | '$' | '%' | '&' | '*' | '/' | ':' | '<' | '=' | '>' | '?' | '^' | '_' | '~'
)
}
fn is_subsequent(ch: char) -> bool {
Self::is_initial(ch) || predicate::is_digit(ch) || matches!(ch, '+' | '-' | '.' | '@')
}
fn is_sign_subsequent(ch: char) -> bool {
Self::is_initial(ch) || matches!(ch, '+' | '-' | '@')
}
fn is_dot_subsequent(ch: char) -> bool {
Self::is_sign_subsequent(ch) || ch == '.'
}
}
#[cfg(test)]
mod tests {
use crate::{mem::Mutator, Position};
use super::*;
const ESCAPES: &[(&str, &str, char)] = &[
(r"\a", "", '\x07'),
(r"\b", "", '\x08'),
(r"\t", "", '\t'),
(r"\n", "", '\n'),
(r"\r", "", '\r'),
(r"\\", "", '\\'),
(r"\|", "", '|'),
(r"\x20;", "", ' '),
(r"\a ", " ", '\x07'),
(r"\b ", " ", '\x08'),
(r"\t ", " ", '\t'),
(r"\n ", " ", '\n'),
(r"\r ", " ", '\r'),
(r"\\ ", " ", '\\'),
(r"\| ", " ", '|'),
(r"\x20; ", " ", ' '),
];
#[test]
fn symbol_escape_success() {
let parse = Sexpr::<0>::symbol_escape;
for (i, (input, rest, expected)) in ESCAPES.iter().enumerate() {
let input = Input::new(input);
let expected = Ok((*expected, Input::new(rest)));
assert_eq!(parse(input), expected, "symbol escape {} failed", i + 1);
}
}
#[test]
fn symbol_escape_failure() {
let parse = Sexpr::<0>::symbol_escape;
let input = Input::new(r"\x20");
let expected = Err(ParseError::new(
Span::new(Position::new(2, 1, 3), Position::new(4, 1, 5)),
ERR_SYMBOL_CODE,
SexprError::SymbolCode,
));
assert_eq!(parse(input), expected);
let input = Input::new(r"\xg;");
let expected = Err(ParseError::new(
Position::new(2, 1, 3),
ERR_SYMBOL_CODE,
SexprError::SymbolCode,
));
assert_eq!(parse(input), expected);
let input = Input::new(r"\g");
let expected = Err(ParseError::new(
Span::new(Position::new(0, 1, 1), Position::new(1, 1, 2)),
ERR_SYMBOL_ESCAPE,
SexprError::SymbolEscape,
));
assert_eq!(parse(input), expected);
let input = Input::new(r#"\""#);
let expected = Err(ParseError::new(
Span::new(Position::new(0, 1, 1), Position::new(1, 1, 2)),
ERR_SYMBOL_ESCAPE,
SexprError::SymbolEscape,
));
assert_eq!(parse(input), expected);
}
const SYMBOLS: &[(&str, &str, &str)] = &[
(r"hello", "", "hello"),
(r"...", "", "..."),
(r"..", "", ".."),
(r"+", "", "+"),
(r"+soup+", "", "+soup+"),
(r"<=?", "", "<=?"),
(r"->string", "", "->string"),
(r"a34kTMNs", "", "a34kTMNs"),
(r"lambda", "", "lambda"),
(r"list->vector", "", "list->vector"),
(r"q", "", "q"),
(r"V17a", "", "V17a"),
(
r"the-word-recursion-has-many-meanings",
"",
"the-word-recursion-has-many-meanings",
),
(r"|two words|", "", "two words"),
(r"|two\x20;words|", "", "two words"),
(r"|two\nwords|", "", "two\nwords"),
(r"|two\|words|", "", "two|words"),
(r"hello ", " ", "hello"),
(r"... ", " ", "..."),
(r".. ", " ", ".."),
(r"+ ", " ", "+"),
(r"+soup+ ", " ", "+soup+"),
(r"<=? ", " ", "<=?"),
(r"->string ", " ", "->string"),
(r"a34kTMNs ", " ", "a34kTMNs"),
(r"lambda ", " ", "lambda"),
(r"|two words| ", " ", "two words"),
];
#[test]
fn symbol_success() {
let m = Mutator::new_ref();
let sexpr = Sexpr::<0>::new(m.clone());
for (i, (input, rest, expected)) in SYMBOLS.iter().enumerate() {
let input = Input::new(input);
let expected = Ok((m.borrow_mut().new_symbol(expected).into(), Input::new(rest)));
assert_eq!(sexpr.symbol()(input), expected, "symbol {} failed", i + 1);
}
}
#[test]
fn symbol_failure() {
let m = Mutator::new_ref();
let sexpr = Sexpr::<0>::new(m);
let input = Input::new(r"");
let expected = Err(ParseError::new(
Position::new(0, 1, 1),
ERR_SYMBOL_LITERAL,
SexprError::SymbolLiteral,
));
assert_eq!(sexpr.symbol()(input), expected);
let input = Input::new(r"|two words");
let expected = Err(ParseError::new(
Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
ERR_SYMBOL_CLOSING,
SexprError::SymbolClosing,
));
assert_eq!(sexpr.symbol()(input), expected);
let input = Input::new(r#"|two\" words|"#);
let expected = Err(ParseError::new(
Span::new(Position::new(4, 1, 5), Position::new(5, 1, 6)),
ERR_SYMBOL_ESCAPE,
SexprError::SymbolEscape,
));
assert_eq!(sexpr.symbol()(input), expected);
let input = Input::new(r"|two\x20 words|");
let expected = Err(ParseError::new(
Span::new(Position::new(6, 1, 7), Position::new(8, 1, 9)),
ERR_SYMBOL_CODE,
SexprError::SymbolCode,
));
assert_eq!(sexpr.symbol()(input), expected);
let input = Input::new(r"|two\xg; words|");
let expected = Err(ParseError::new(
Position::new(6, 1, 7),
ERR_SYMBOL_CODE,
SexprError::SymbolCode,
));
assert_eq!(sexpr.symbol()(input), expected);
let input = Input::new(r".");
let expected = Err(ParseError::new(
Position::new(0, 1, 1),
ERR_SYMBOL_SINGLE_DOT,
SexprError::SymbolSingleDot,
));
assert_eq!(sexpr.symbol()(input), expected);
}
}