#![allow(clippy::enum_variant_names)]
use super::string_helpers;
use crate::value::{BitSequence, Composite, Primitive, Value, Variant};
use std::num::ParseIntError;
use yap::{types::StrTokens, IntoTokens, TokenLocation, Tokens};
pub struct FromStrBuilder {
custom_parsers: Vec<CustomParser>,
}
type CustomParser = Box<dyn Fn(&mut &str) -> Option<Result<Value<()>, ParseError>> + 'static>;
impl FromStrBuilder {
pub(crate) fn new() -> Self {
FromStrBuilder { custom_parsers: Vec::new() }
}
pub fn add_custom_parser<F>(mut self, f: F) -> Self
where
F: Fn(&mut &str) -> Option<Result<Value<()>, ParseError>> + 'static,
{
self.custom_parsers.push(Box::new(f));
self
}
pub fn parse<'a>(&self, s: &'a str) -> (Result<Value<()>, ParseError>, &'a str) {
let mut tokens = s.into_tokens();
let res = parse_value(&mut tokens, &self.custom_parsers);
let remaining = tokens.remaining();
(res, remaining)
}
}
#[derive(Debug, thiserror::Error)]
pub struct ParseError {
pub start_loc: usize,
pub end_loc: Option<usize>,
pub err: ParseErrorKind,
}
impl ParseError {
pub fn new_at<E: Into<ParseErrorKind>>(err: E, loc: usize) -> Self {
Self { start_loc: loc, end_loc: None, err: err.into() }
}
pub fn new_between<E: Into<ParseErrorKind>>(err: E, start: usize, end: usize) -> Self {
Self { start_loc: start, end_loc: Some(end), err: err.into() }
}
}
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(end_loc) = self.end_loc {
write!(f, "Error from char {} to {}: {}", self.start_loc, end_loc, self.err)
} else {
write!(f, "Error at char {}: {}", self.start_loc, self.err)
}
}
}
macro_rules! at_between {
($ty:ident) => {
impl $ty {
pub fn at(self, loc: usize) -> ParseError {
ParseError::new_at(self, loc)
}
pub fn at_one(self, loc: usize) -> ParseError {
ParseError::new_between(self, loc, loc + 1)
}
pub fn between(self, start: usize, end: usize) -> ParseError {
ParseError::new_between(self, start, end)
}
}
};
}
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum ParseErrorKind {
#[error("Expected a value")]
ExpectedValue,
#[error("{0}")]
Complex(#[from] ParseComplexError),
#[error("{0}")]
Char(#[from] ParseCharError),
#[error("{0}")]
String(#[from] ParseStringError),
#[error("{0}")]
Number(#[from] ParseNumberError),
#[error("{0}")]
BitSequence(#[from] ParseBitSequenceError),
#[error("{0}")]
Custom(ParseCustomError),
}
at_between!(ParseErrorKind);
impl ParseErrorKind {
pub fn custom<E: Into<ParseCustomError>>(e: E) -> Self {
ParseErrorKind::Custom(e.into())
}
}
pub type ParseCustomError = Box<dyn std::error::Error + Send + Sync + 'static>;
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
#[allow(missing_docs)]
pub enum ParseComplexError {
#[error("The first character in a field name should be alphabetic")]
InvalidStartingCharacterInIdent,
#[error("Field name is not valid (it should begin with an alphabetical char and then consist only of alphanumeric chars)")]
InvalidFieldName,
#[error("Missing field separator; expected {0}")]
MissingFieldSeparator(char),
#[error("Missing closing '{0}'")]
ExpectedCloserToMatch(char, usize),
}
at_between!(ParseComplexError);
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
#[allow(missing_docs)]
pub enum ParseCharError {
#[error("Expected a single character")]
ExpectedValidCharacter,
#[error("Expected an escape code to follow the '\\'")]
ExpectedValidEscapeCode,
#[error("Expected a closing quote to match the opening quote at position {0}")]
ExpectedClosingQuoteToMatch(usize),
}
at_between!(ParseCharError);
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
#[allow(missing_docs)]
pub enum ParseStringError {
#[error("Expected a closing quote to match the opening quote at position {0}")]
ExpectedClosingQuoteToMatch(usize),
#[error("Expected an escape code to follow the '\\'")]
ExpectedValidEscapeCode,
}
at_between!(ParseStringError);
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
#[allow(missing_docs)]
pub enum ParseNumberError {
#[error("Expected one or more digits")]
ExpectedDigit,
#[error("Failed to parse digits into an integer: {0}")]
ParsingFailed(ParseIntError),
}
at_between!(ParseNumberError);
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
#[allow(missing_docs)]
pub enum ParseBitSequenceError {
#[error("Expected a closing bracket ('>') to match the opening one at position {0}")]
ExpectedClosingBracketToMatch(usize),
#[error("Invalid character; expecting a 0 or 1")]
InvalidCharacter,
}
at_between!(ParseBitSequenceError);
fn parse_value(
t: &mut StrTokens,
custom_parsers: &[CustomParser],
) -> Result<Value<()>, ParseError> {
if !custom_parsers.is_empty() {
let s = t.remaining();
let start_offset = t.offset();
let cursor = &mut &*s;
for parser in custom_parsers {
if let Some(res) = parser(cursor) {
match res {
Ok(value) => {
for _ in cursor.len()..s.len() {
t.next();
}
return Ok(value);
}
Err(e) => {
return Err(ParseError {
start_loc: start_offset + e.start_loc,
end_loc: e.end_loc.map(|l| start_offset + l),
err: e.err,
});
}
}
}
}
}
let val = yap::one_of!(t;
transpose_err(parse_bool(t).map(Value::bool).ok_or(None)),
transpose_err(parse_char(t).map(Value::char)),
transpose_err(parse_string(t).map(Value::string)),
transpose_err(parse_number(t).map(Value::primitive)),
transpose_err(parse_named_composite(t, custom_parsers).map(|v| v.into())),
transpose_err(parse_unnamed_composite(t, custom_parsers).map(|v| v.into())),
transpose_err(parse_bit_sequence(t).map(Value::bit_sequence)),
transpose_err(parse_variant(t, custom_parsers).map(|v| v.into())),
);
match val {
Some(Ok(val)) => Ok(val),
Some(Err(e)) => Err(e),
None => Err(ParseError::new_at(ParseErrorKind::ExpectedValue, t.offset())),
}
}
fn parse_named_composite(
t: &mut StrTokens,
custom_parsers: &[CustomParser],
) -> Result<Composite<()>, Option<ParseError>> {
let start = t.offset();
if !t.token('{') {
return Err(None);
}
skip_whitespace(t);
if t.token('}') {
return Ok(Composite::Named(vec![]));
}
let vals = t
.sep_by_err(
|t| parse_field_name_and_value(t, custom_parsers),
|t| skip_spaced_separator(t, ','),
)
.collect::<Result<_, _>>()?;
skip_whitespace(t);
if !t.token('}') {
return Err(Some(ParseComplexError::ExpectedCloserToMatch('}', start).at_one(t.offset())));
}
Ok(Composite::Named(vals))
}
fn parse_unnamed_composite(
t: &mut StrTokens,
custom_parsers: &[CustomParser],
) -> Result<Composite<()>, Option<ParseError>> {
let start = t.offset();
if !t.token('(') {
return Err(None);
}
skip_whitespace(t);
if t.token(')') {
return Ok(Composite::Unnamed(vec![]));
}
let vals = t
.sep_by_err(|t| parse_value(t, custom_parsers), |t| skip_spaced_separator(t, ','))
.collect::<Result<_, _>>()?;
skip_whitespace(t);
if !t.token(')') {
return Err(Some(ParseComplexError::ExpectedCloserToMatch(')', start).at_one(t.offset())));
}
Ok(Composite::Unnamed(vals))
}
fn parse_variant(
t: &mut StrTokens,
custom_parsers: &[CustomParser],
) -> Result<Variant<()>, Option<ParseError>> {
let ident = match parse_optional_variant_ident(t) {
Some(ident) => ident,
None => return Err(None),
};
skip_whitespace(t);
let composite = yap::one_of!(t;
transpose_err(parse_named_composite(t, custom_parsers)),
transpose_err(parse_unnamed_composite(t, custom_parsers))
);
match composite {
Some(Ok(values)) => Ok(Variant { name: ident, values }),
Some(Err(e)) => Err(Some(e)),
None => Err(None),
}
}
fn parse_bit_sequence(t: &mut StrTokens) -> Result<BitSequence, Option<ParseError>> {
let start = t.offset();
if !t.token('<') {
return Err(None);
}
let bits = t.tokens_while(|&c| c == '0' || c == '1').map(|c| c == '1');
let mut seq = BitSequence::new();
for bit in bits {
seq.push(bit);
}
if !t.token('>') {
return Err(Some(
ParseBitSequenceError::ExpectedClosingBracketToMatch(start)
.between(t.offset(), t.offset() + 1),
));
}
Ok(seq)
}
fn parse_bool(t: &mut StrTokens) -> Option<bool> {
if t.tokens("true".chars()) {
Some(true)
} else if t.tokens("false".chars()) {
Some(false)
} else {
None
}
}
fn parse_char(t: &mut StrTokens) -> Result<char, Option<ParseError>> {
let start = t.offset();
if !t.token('\'') {
return Err(None);
}
let char = match t.next() {
None => return Err(Some(ParseCharError::ExpectedValidCharacter.at_one(t.offset()))),
Some(c) => c,
};
let char = if char == '\\' {
let escape_code = match t.next() {
None => return Err(Some(ParseCharError::ExpectedValidEscapeCode.at_one(t.offset()))),
Some(c) => c,
};
match string_helpers::from_escape_code(escape_code) {
None => return Err(Some(ParseCharError::ExpectedValidEscapeCode.at_one(t.offset()))),
Some(c) => c,
}
} else {
char
};
if !t.token('\'') {
return Err(Some(ParseCharError::ExpectedClosingQuoteToMatch(start).at_one(t.offset())));
}
Ok(char)
}
fn parse_number(t: &mut StrTokens) -> Result<Primitive, Option<ParseError>> {
let start_loc = t.offset();
let is_positive = t.token('+') || !t.token('-');
let sign = if is_positive { "".chars() } else { "-".chars() };
let mut seen_n = false;
let digits = t
.tokens_while(|c| {
if c.is_ascii_digit() {
seen_n = true;
true
} else {
seen_n && *c == '_'
}
})
.filter(|c| c.is_ascii_digit());
let n_str: String = sign.chain(digits).collect();
let end_loc = t.offset();
if end_loc == start_loc {
return Err(None);
}
if !seen_n {
return Err(Some(ParseNumberError::ExpectedDigit.between(end_loc, end_loc + 1)));
}
if is_positive {
n_str
.parse::<u128>()
.map(Primitive::u128)
.map_err(|e| Some(ParseNumberError::ParsingFailed(e).between(start_loc, end_loc)))
} else {
n_str
.parse::<i128>()
.map(Primitive::i128)
.map_err(|e| Some(ParseNumberError::ParsingFailed(e).between(start_loc, end_loc)))
}
}
fn parse_string(t: &mut StrTokens) -> Result<String, Option<ParseError>> {
let start = t.offset();
if !t.token('"') {
return Err(None);
}
let mut out: String = String::new();
let mut next_is_escaped = false;
loop {
let pos = t.offset();
let char = match t.next() {
Some(c) => c,
None => {
return Err(Some(
ParseStringError::ExpectedClosingQuoteToMatch(start).at_one(t.offset()),
))
}
};
match char {
'\\' if !next_is_escaped => {
next_is_escaped = true;
}
c if next_is_escaped => match string_helpers::from_escape_code(c) {
Some(c) => {
out.push(c);
next_is_escaped = false;
}
None => {
return Err(Some(
ParseStringError::ExpectedValidEscapeCode.between(pos, pos + 1),
))
}
},
'"' => {
break; }
c => {
out.push(c);
}
}
}
Ok(out)
}
fn parse_field_name_and_value(
t: &mut StrTokens,
custom_parsers: &[CustomParser],
) -> Result<(String, Value<()>), ParseError> {
let name = parse_field_name(t)?;
if !skip_spaced_separator(t, ':') {
return Err(ParseComplexError::MissingFieldSeparator(':').at_one(t.offset()));
}
let value = parse_value(t, custom_parsers)?;
Ok((name, value))
}
fn parse_field_name(t: &mut StrTokens) -> Result<String, ParseError> {
let field_name = yap::one_of!(t;
transpose_err(parse_string(t)),
Some(parse_ident(t)),
);
match field_name {
Some(Ok(name)) => Ok(name),
Some(Err(e)) => Err(e),
None => Err(ParseComplexError::InvalidFieldName.at(t.offset())),
}
}
fn parse_optional_variant_ident(t: &mut StrTokens) -> Option<String> {
fn parse_i_string(t: &mut StrTokens) -> Option<String> {
if t.next()? != 'v' {
return None;
}
parse_string(t).ok()
}
yap::one_of!(t;
parse_i_string(t),
parse_ident(t).ok()
)
}
fn parse_ident(t: &mut StrTokens) -> Result<String, ParseError> {
let start = t.location();
if t.skip_tokens_while(|c| c.is_alphabetic()) == 0 {
return Err(ParseComplexError::InvalidStartingCharacterInIdent.at_one(start.offset()));
}
t.skip_tokens_while(|c| c.is_alphanumeric() || *c == '_');
let end = t.location();
let ident_str = t.slice(start, end).as_iter().collect();
Ok(ident_str)
}
fn skip_whitespace(t: &mut StrTokens) {
t.skip_tokens_while(|c| c.is_whitespace());
}
fn skip_spaced_separator(t: &mut StrTokens, s: char) -> bool {
skip_whitespace(t);
let is_sep = t.token(s);
skip_whitespace(t);
is_sep
}
fn transpose_err<T, E>(r: Result<T, Option<E>>) -> Option<Result<T, E>> {
match r {
Ok(val) => Some(Ok(val)),
Err(Some(e)) => Some(Err(e)),
Err(None) => None,
}
}
#[cfg(test)]
mod test {
use super::*;
#[derive(Debug)]
pub struct E(ParseError);
impl From<ParseError> for E {
fn from(value: ParseError) -> Self {
E(value)
}
}
impl PartialEq for E {
fn eq(&self, other: &Self) -> bool {
let (a, b) = (&self.0, &other.0);
if (a.start_loc, a.end_loc) != (b.start_loc, b.end_loc) {
return false;
}
match (&a.err, &b.err) {
(ParseErrorKind::String(a), ParseErrorKind::String(b)) => a == b,
(ParseErrorKind::Char(a), ParseErrorKind::Char(b)) => a == b,
(ParseErrorKind::Number(a), ParseErrorKind::Number(b)) => a == b,
(ParseErrorKind::Custom(a), ParseErrorKind::Custom(b)) => {
a.to_string() == b.to_string()
}
_ => {
panic!("PartialEq not implemented for these variants yet.")
}
}
}
}
fn from(s: &str) -> Result<Value<()>, E> {
let (res, remaining) = FromStrBuilder::new().parse(s);
match res {
Ok(value) => {
assert_eq!(remaining.len(), 0, "was not expecting any unparsed output");
Ok(value)
}
Err(e) => Err(E(e)),
}
}
#[test]
fn parse_bools() {
assert_eq!(from("true"), Ok(Value::bool(true)));
assert_eq!(from("false"), Ok(Value::bool(false)));
}
#[test]
fn parse_numbers() {
assert_eq!(from("123"), Ok(Value::u128(123)));
assert_eq!(from("1_234_56"), Ok(Value::u128(123_456)));
assert_eq!(from("+1_234_56"), Ok(Value::u128(123_456)));
assert_eq!(from("-123_4"), Ok(Value::i128(-1234)));
assert_eq!(from("-abc"), Err(E(ParseNumberError::ExpectedDigit.between(1, 2))));
}
#[test]
fn parse_chars() {
assert_eq!(from("'a'"), Ok(Value::char('a')));
assert_eq!(from("'😀'"), Ok(Value::char('😀')));
assert_eq!(from("'\\n'"), Ok(Value::char('\n')));
assert_eq!(from("'\\t'"), Ok(Value::char('\t')));
assert_eq!(from("'\\\"'"), Ok(Value::char('"')));
assert_eq!(from("'\\\''"), Ok(Value::char('\'')));
assert_eq!(from("'\\r'"), Ok(Value::char('\r')));
assert_eq!(from("'\\\\'"), Ok(Value::char('\\')));
assert_eq!(from("'\\0'"), Ok(Value::char('\0')));
assert_eq!(from("'a"), Err(E(ParseCharError::ExpectedClosingQuoteToMatch(0).at_one(2))));
}
#[test]
fn parse_strings() {
assert_eq!(from("\"\\n \\r \\t \\0 \\\"\""), Ok(Value::string("\n \r \t \0 \"")));
assert_eq!(from("\"Hello there 😀\""), Ok(Value::string("Hello there 😀")));
assert_eq!(from("\"Hello\\n\\t there\""), Ok(Value::string("Hello\n\t there")));
assert_eq!(from("\"Hello\\\\ there\""), Ok(Value::string("Hello\\ there")));
assert_eq!(
from("\"Hello\\p there\""),
Err(E(ParseStringError::ExpectedValidEscapeCode.between(7, 8)))
);
assert_eq!(
from("\"Hi"),
Err(E(ParseStringError::ExpectedClosingQuoteToMatch(0).at_one(3)))
);
}
#[test]
fn parse_unnamed_composites() {
assert_eq!(
from("( true, 1234 ,\t\n\t \"Hello!\" )"),
Ok(Value::unnamed_composite(vec![
Value::bool(true),
Value::u128(1234),
Value::string("Hello!")
]))
);
assert_eq!(from("()"), Ok(Value::unnamed_composite([])));
assert_eq!(from("(\n\n\t\t\n)"), Ok(Value::unnamed_composite([])));
}
#[test]
fn parse_named_composites() {
assert_eq!(
from(
"{
hello: true,
foo: 1234,
\"Hello there 😀\": \"Hello!\"
}"
),
Ok(Value::named_composite([
("hello", Value::bool(true)),
("foo", Value::u128(1234)),
("Hello there 😀", Value::string("Hello!"))
]))
);
}
#[test]
fn parse_variants() {
assert_eq!(
from(
"MyVariant {
hello: true,
foo: 1234,
\"Hello there 😀\": \"Hello!\"
}"
),
Ok(Value::named_variant(
"MyVariant",
[
("hello", Value::bool(true)),
("foo", Value::u128(1234)),
("Hello there 😀", Value::string("Hello!"))
]
))
);
assert_eq!(
from("Foo ( true, 1234 ,\t\n\t \"Hello!\" )"),
Ok(Value::unnamed_variant(
"Foo",
vec![Value::bool(true), Value::u128(1234), Value::string("Hello!")]
))
);
assert_eq!(from("Foo()"), Ok(Value::unnamed_variant("Foo", [])));
assert_eq!(from("Foo{}"), Ok(Value::named_variant::<_, String, _>("Foo", [])));
assert_eq!(from("Foo( \t)"), Ok(Value::unnamed_variant("Foo", [])));
assert_eq!(from("Foo{ }"), Ok(Value::named_variant::<_, String, _>("Foo", [])));
assert_eq!(
from("v\"variant name\" { }"),
Ok(Value::named_variant::<_, String, _>("variant name", []))
);
}
#[test]
fn parse_bit_sequences() {
use scale_bits::bits;
assert_eq!(
from("<011010110101101>"),
Ok(Value::bit_sequence(bits![0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1]))
);
assert_eq!(from("<01101>"), Ok(Value::bit_sequence(bits![0, 1, 1, 0, 1])));
assert_eq!(from("<0>"), Ok(Value::bit_sequence(bits![0])));
assert_eq!(from("<>"), Ok(Value::bit_sequence(bits![])));
}
#[test]
fn custom_parsers() {
let custom_parser = FromStrBuilder::new()
.add_custom_parser(|s| {
let mut toks = s.into_tokens();
if !toks.tokens("0x".chars()) {
return None;
}
let from = toks.location();
let num_hex_chars = toks.skip_tokens_while(|c| {
c.is_numeric()
|| ['a', 'b', 'c', 'd', 'e', 'f'].contains(&c.to_ascii_lowercase())
});
if num_hex_chars % 2 != 0 {
let e = ParseErrorKind::custom("Wrong number hex")
.between(from.offset(), toks.offset());
return Some(Err(e));
}
let hex: String = toks.slice(from, toks.location()).as_iter().collect();
*s = toks.remaining();
Some(Ok(Value::string(hex)))
});
let expected = [
(
"(1, 0x1234, true)",
(
Ok(Value::unnamed_composite([
Value::u128(1),
Value::string("1234"),
Value::bool(true),
])),
"",
),
),
(
"0x12345zzz",
(Err(ParseErrorKind::custom("Wrong number hex").between(2, 7)), "0x12345zzz"),
),
(
"(true, 0x12345)",
(Err(ParseErrorKind::custom("Wrong number hex").between(9, 14)), ", 0x12345)"),
),
];
for (s, v) in expected {
let (expected_res, expected_leftover) = (v.0.map_err(E), v.1);
let (res, leftover) = custom_parser.parse(s);
assert_eq!(res.map_err(E), expected_res, "result isn't what we expected for: {s}");
assert_eq!(leftover, expected_leftover, "wrong number of leftover bytes for: {s}");
}
}
}