use crate::{pbuf::PBytes, Abstract, ValArray, Value};
use arcstr::ArcStr;
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
use bytes::Bytes;
use combine::{
attempt, between, choice, eof, from_str, look_ahead, many1, none_of, not_followed_by,
one_of, optional, parser, satisfy,
parser::{
char::{alpha_num, digit, spaces, string},
combinator::recognize,
range::{take_while, take_while1},
repeat::escaped,
},
sep_by, sep_by1,
stream::{position, Range},
token, unexpected_any, EasyParser, ParseError, Parser, RangeStream,
};
use compact_str::CompactString;
use escaping::Escape;
use netidx_core::pack::Pack;
use poolshark::local::LPooled;
use rust_decimal::Decimal;
use std::{borrow::Cow, str::FromStr, sync::LazyLock, time::Duration};
use triomphe::Arc;
pub fn sep_by1_tok<I, O, OC, EP, SP, TP>(
p: EP,
sep: SP,
term: TP,
) -> impl Parser<I, Output = OC>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
OC: Extend<O> + Default,
SP: Parser<I>,
EP: Parser<I, Output = O>,
TP: Parser<I>,
{
sep_by1(choice((look_ahead(term).map(|_| None::<O>), p.map(Some))), sep).map(
|mut e: LPooled<Vec<Option<O>>>| {
let mut res = OC::default();
res.extend(e.drain(..).filter_map(|e| e));
res
},
)
}
pub fn sep_by_tok<I, O, OC, EP, SP, TP>(
p: EP,
sep: SP,
term: TP,
) -> impl Parser<I, Output = OC>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
OC: Extend<O> + Default,
SP: Parser<I>,
EP: Parser<I, Output = O>,
TP: Parser<I>,
{
sep_by(choice((look_ahead(term).map(|_| None::<O>), p.map(Some))), sep).map(
|mut e: LPooled<Vec<Option<O>>>| {
let mut res = OC::default();
res.extend(e.drain(..).filter_map(|e| e));
res
},
)
}
pub fn not_prefix<I>() -> impl Parser<I, Output = ()>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
not_followed_by(choice((token('_'), alpha_num())))
}
fn sptoken<I>(t: char) -> impl Parser<I, Output = char>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
spaces().with(token(t))
}
fn spstring<I>(t: &'static str) -> impl Parser<I, Output = &'static str>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
spaces().with(string(t))
}
fn csep<I>() -> impl Parser<I, Output = char>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
attempt(spaces().with(token(','))).skip(spaces())
}
fn should_escape_generic(c: char) -> bool {
c.is_control()
}
pub const VAL_MUST_ESC: [char; 2] = ['\\', '"'];
pub static VAL_ESC: LazyLock<Escape> = LazyLock::new(|| {
Escape::new(
'\\',
&['\\', '"', '\n', '\r', '\0', '\t'],
&[('\n', "n"), ('\r', "r"), ('\t', "t"), ('\0', "0")],
Some(should_escape_generic),
)
.unwrap()
});
pub fn escaped_string<I>(
must_esc: &'static [char],
esc: &Escape,
) -> impl Parser<I, Output = String>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
recognize(escaped(
take_while1(move |c| !must_esc.contains(&c)),
esc.get_escape_char(),
one_of(
esc.get_tr()
.iter()
.filter_map(|(_, s)| s.chars().next())
.chain(must_esc.iter().copied()),
),
))
.map(|s| match esc.unescape(&s) {
Cow::Borrowed(_) => s, Cow::Owned(s) => s,
})
}
fn quoted<I>(
must_escape: &'static [char],
esc: &Escape,
) -> impl Parser<I, Output = String>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
between(token('"'), token('"'), escaped_string(must_escape, esc))
}
pub trait FromStrRadix: Sized {
fn from_str_radix(s: &str, radix: u32) -> Result<Self, std::num::ParseIntError>;
}
macro_rules! impl_from_str_radix {
($($t:ty),*) => { $(
impl FromStrRadix for $t {
fn from_str_radix(s: &str, radix: u32) -> Result<Self, std::num::ParseIntError> {
<$t>::from_str_radix(s, radix)
}
}
)* };
}
impl_from_str_radix!(u8, i8, u16, i16, u32, i32, u64, i64, usize, isize);
fn radix_prefix<I>() -> impl Parser<I, Output = (u32, CompactString)>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
choice((
attempt(
token('0')
.with(one_of(['x', 'X']))
.with(many1(satisfy(|c: char| c.is_ascii_hexdigit())))
.map(|s: CompactString| (16u32, s)),
),
attempt(
token('0')
.with(one_of(['b', 'B']))
.with(many1(satisfy(|c: char| c == '0' || c == '1')))
.map(|s: CompactString| (2u32, s)),
),
attempt(
token('0')
.with(one_of(['o', 'O']))
.with(many1(satisfy(|c: char| c.is_digit(8))))
.map(|s: CompactString| (8u32, s)),
),
))
}
fn uint<I, T: FromStrRadix + Clone + Copy>() -> impl Parser<I, Output = T>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
choice((
radix_prefix(),
many1(digit()).map(|s: CompactString| (10u32, s)),
))
.then(|(radix, digits): (u32, CompactString)| {
match T::from_str_radix(&digits, radix) {
Ok(i) => combine::value(i).right(),
Err(_) => unexpected_any("invalid unsigned integer").left(),
}
})
}
pub fn int<I, T: FromStrRadix + Clone + Copy>() -> impl Parser<I, Output = T>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
choice((
attempt(optional(token('-')).and(radix_prefix())).then(
|(sign, (radix, digits)): (Option<char>, (u32, CompactString))| {
let s = if sign.is_some() {
let mut s = CompactString::new("-");
s.push_str(&digits);
s
} else {
digits
};
match T::from_str_radix(&s, radix) {
Ok(i) => combine::value(i).right(),
Err(_) => unexpected_any("invalid signed integer").left(),
}
},
),
recognize((optional(token('-')), take_while1(|c: char| c.is_digit(10)))).then(
|s: CompactString| match T::from_str_radix(&s, 10) {
Ok(i) => combine::value(i).right(),
Err(_) => unexpected_any("invalid signed integer").left(),
},
),
))
}
fn flt<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
choice((
attempt(recognize((
optional(token('-')),
take_while1(|c: char| c.is_digit(10)),
optional(token('.')),
take_while(|c: char| c.is_digit(10)),
token('e'),
optional(token('-')),
take_while1(|c: char| c.is_digit(10)),
))),
attempt(recognize((
optional(token('-')),
take_while1(|c: char| c.is_digit(10)),
token('.'),
take_while(|c: char| c.is_digit(10)),
))),
))
.then(|s: CompactString| match s.parse::<T>() {
Ok(i) => combine::value(i).right(),
Err(_) => unexpected_any("invalid float").left(),
})
}
fn base64<I>() -> impl Parser<I, Output = LPooled<Vec<u8>>>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
recognize((
take_while(|c: char| c.is_ascii_alphanumeric() || c == '+' || c == '/'),
take_while(|c: char| c == '='),
))
.then(|s: LPooled<String>| {
let s = if &*s == "==" { LPooled::take() } else { s };
let mut buf: LPooled<Vec<u8>> = LPooled::take();
match BASE64.decode_vec(&*s, &mut buf) {
Ok(()) => combine::value(buf).right(),
Err(_) => unexpected_any("base64 decode failed").left(),
}
})
}
fn constant<I>(typ: &'static str) -> impl Parser<I, Output = ()>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
string(typ).with(spaces()).with(token(':')).with(spaces()).map(|_| ())
}
pub fn close_expr<I>() -> impl Parser<I, Output = ()>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
not_followed_by(none_of([' ', '\n', '\t', ';', ')', ',', ']', '}', '"']))
}
fn value_<I>(must_escape: &'static [char], esc: &Escape) -> impl Parser<I, Output = Value>
where
I: RangeStream<Token = char>,
I::Error: ParseError<I::Token, I::Range, I::Position>,
I::Range: Range,
{
spaces().with(choice((
choice((
attempt(constant("u8")).with(uint::<_, u8>().map(Value::U8)),
attempt(constant("u16")).with(uint::<_, u16>().map(Value::U16)),
attempt(constant("u32")).with(uint::<_, u32>().map(Value::U32)),
constant("u64").with(uint::<_, u64>().map(Value::U64)),
attempt(constant("i8")).with(int::<_, i8>().map(Value::I8)),
attempt(constant("i16")).with(int::<_, i16>().map(Value::I16)),
attempt(constant("i32")).with(int::<_, i32>().map(Value::I32)),
constant("i64").with(int::<_, i64>().map(Value::I64)),
attempt(constant("v32")).with(uint::<_, u32>().map(Value::V32)),
constant("v64").with(uint::<_, u64>().map(Value::V64)),
attempt(constant("z32")).with(int::<_, i32>().map(Value::Z32)),
constant("z64").with(int::<_, i64>().map(Value::Z64)),
attempt(constant("f32")).with(flt::<_, f32>().map(Value::F32)),
attempt(constant("f64")).with(flt::<_, f64>().map(Value::F64)),
)),
between(
token('['),
sptoken(']'),
sep_by_tok(value(must_escape, esc), csep(), token(']')),
)
.map(|mut vals: LPooled<Vec<Value>>| {
Value::Array(ValArray::from_iter_exact(vals.drain(..)))
}),
between(
token('{'),
sptoken('}'),
sep_by_tok(
(value(must_escape, esc), spstring("=>").with(value(must_escape, esc))),
csep(),
token('}'),
)
.map(|mut vals: LPooled<Vec<(Value, Value)>>| {
Value::Map(immutable_chunkmap::map::Map::from_iter(vals.drain(..)))
}),
),
quoted(must_escape, esc).map(|s| Value::String(ArcStr::from(s))),
flt::<_, f64>().map(Value::F64),
int::<_, i64>().map(Value::I64),
attempt(string("true").skip(not_prefix())).map(|_| Value::Bool(true)),
attempt(string("false").skip(not_prefix())).map(|_| Value::Bool(false)),
attempt(string("null").skip(not_prefix())).map(|_| Value::Null),
constant("bytes")
.with(base64())
.map(|v| Value::Bytes(PBytes::new(Bytes::from(LPooled::detach(v))))),
constant("abstract").with(base64()).then(|v| {
match Abstract::decode(&mut &v[..]) {
Ok(a) => combine::value(Value::Abstract(a)).right(),
Err(_) => unexpected_any("failed to unpack abstract").left(),
}
}),
constant("error")
.with(value(must_escape, esc))
.map(|v| Value::Error(Arc::new(v))),
attempt(constant("decimal"))
.with(flt::<_, Decimal>())
.map(|d| Value::Decimal(Arc::new(d))),
attempt(constant("datetime"))
.with(from_str(quoted(must_escape, esc)))
.map(|d| Value::DateTime(Arc::new(d))),
constant("duration")
.with(flt::<_, f64>().and(choice((
string("ns"),
string("us"),
string("ms"),
string("s"),
))))
.map(|(n, suffix)| {
let d = match suffix {
"ns" => Duration::from_secs_f64(n / 1e9),
"us" => Duration::from_secs_f64(n / 1e6),
"ms" => Duration::from_secs_f64(n / 1e3),
"s" => Duration::from_secs_f64(n),
_ => unreachable!(),
};
Value::Duration(Arc::new(d))
}),
)))
}
parser! {
pub fn value['a, I](
must_escape: &'static [char],
esc: &'a Escape
)(I) -> Value
where [I: RangeStream<Token = char>, I::Range: Range]
{
value_(must_escape, esc)
}
}
pub fn parse_value(s: &str) -> anyhow::Result<Value> {
value(&VAL_MUST_ESC, &VAL_ESC)
.skip(spaces())
.skip(eof())
.easy_parse(position::Stream::new(s))
.map(|(r, _)| r)
.map_err(|e| anyhow::anyhow!(format!("{}", e)))
}
#[cfg(test)]
mod tests {
use arcstr::literal;
use crate::Map;
use super::*;
#[test]
fn parse() {
assert_eq!(Value::U32(23), parse_value("u32:23").unwrap());
assert_eq!(Value::V32(42), parse_value("v32:42").unwrap());
assert_eq!(Value::I32(-10), parse_value("i32:-10").unwrap());
assert_eq!(Value::I32(12321), parse_value("i32:12321").unwrap());
assert_eq!(Value::Z32(-99), parse_value("z32:-99").unwrap());
assert_eq!(Value::U64(100), parse_value("u64:100").unwrap());
assert_eq!(Value::V64(100), parse_value("v64:100").unwrap());
assert_eq!(Value::I64(-100), parse_value("i64:-100").unwrap());
assert_eq!(Value::I64(-100), parse_value("-100").unwrap());
assert_eq!(Value::I64(100), parse_value("i64:100").unwrap());
assert_eq!(Value::I64(100), parse_value("100").unwrap());
assert_eq!(Value::Z64(-100), parse_value("z64:-100").unwrap());
assert_eq!(Value::Z64(100), parse_value("z64:100").unwrap());
assert_eq!(Value::F32(3.1415), parse_value("f32:3.1415").unwrap());
assert_eq!(Value::F32(675.6), parse_value("f32:675.6").unwrap());
assert_eq!(Value::F32(42.3435), parse_value("f32:42.3435").unwrap());
assert_eq!(Value::F32(1.123e9), parse_value("f32:1.123e9").unwrap());
assert_eq!(Value::F32(1e9), parse_value("f32:1e9").unwrap());
assert_eq!(Value::F32(21.2443e-6), parse_value("f32:21.2443e-6").unwrap());
assert_eq!(Value::F32(3.), parse_value("f32:3.").unwrap());
assert_eq!(Value::F64(3.1415), parse_value("f64:3.1415").unwrap());
assert_eq!(Value::F64(3.1415), parse_value("3.1415").unwrap());
assert_eq!(Value::F64(1.123e9), parse_value("1.123e9").unwrap());
assert_eq!(Value::F64(1e9), parse_value("1e9").unwrap());
assert_eq!(Value::F64(21.2443e-6), parse_value("21.2443e-6").unwrap());
assert_eq!(Value::F64(3.), parse_value("f64:3.").unwrap());
assert_eq!(Value::F64(3.), parse_value("3.").unwrap());
let c = ArcStr::from(r#"I've got a lovely "bunch" of (coconuts)"#);
let s = r#""I've got a lovely \"bunch\" of (coconuts)""#;
assert_eq!(Value::String(c), parse_value(s).unwrap());
let c = ArcStr::new();
assert_eq!(Value::String(c), parse_value(r#""""#).unwrap());
let c = ArcStr::from(r#"""#);
let s = r#""\"""#;
assert_eq!(Value::String(c), parse_value(s).unwrap());
assert_eq!(Value::Bool(true), parse_value("true").unwrap());
assert_eq!(Value::Bool(true), parse_value("true ").unwrap());
assert_eq!(Value::Bool(false), parse_value("false").unwrap());
assert_eq!(Value::Null, parse_value("null").unwrap());
assert_eq!(
Value::error(literal!("error")),
parse_value(r#"error:"error""#).unwrap()
);
let a = ValArray::from_iter_exact(
[Value::I64(42), Value::String(literal!("hello world"))].into_iter(),
);
assert_eq!(
Value::Array(a.clone()),
parse_value(r#"[42, "hello world", ]"#).unwrap()
);
assert_eq!(Value::Array(a), parse_value(r#"[42, "hello world"]"#).unwrap());
let m = Map::from_iter([
(Value::I64(42), Value::String(literal!("hello world"))),
(Value::String(literal!("hello world")), Value::I64(42)),
]);
assert_eq!(
Value::Map(m.clone()),
parse_value(r#"{ 42 => "hello world", "hello world" => 42, }"#).unwrap()
);
assert_eq!(
Value::Map(m.clone()),
parse_value(r#"{ 42 => "hello world", "hello world" => 42}"#).unwrap()
);
assert_eq!(Value::U8(255), parse_value("u8:0xFF").unwrap());
assert_eq!(Value::U8(255), parse_value("u8:0XFF").unwrap());
assert_eq!(Value::I32(-31), parse_value("i32:-0x1F").unwrap());
assert_eq!(Value::U64(0xDEAD), parse_value("u64:0xDEAD").unwrap());
assert_eq!(Value::I64(255), parse_value("i64:0xFF").unwrap());
assert_eq!(Value::U16(10), parse_value("u16:0b1010").unwrap());
assert_eq!(Value::U16(10), parse_value("u16:0B1010").unwrap());
assert_eq!(Value::I8(-1), parse_value("i8:-0b1").unwrap());
assert_eq!(Value::U32(63), parse_value("u32:0o77").unwrap());
assert_eq!(Value::U32(63), parse_value("u32:0O77").unwrap());
assert_eq!(Value::I64(-8), parse_value("i64:-0o10").unwrap());
assert_eq!(Value::I64(255), parse_value("0xFF").unwrap());
assert_eq!(Value::I64(10), parse_value("0b1010").unwrap());
assert_eq!(Value::I64(63), parse_value("0o77").unwrap());
assert_eq!(Value::I64(-255), parse_value("-0xFF").unwrap());
}
}