netidx_value/
parser.rs

1use crate::{pbuf::PBytes, Abstract, ValArray, Value};
2use arcstr::ArcStr;
3use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
4use bytes::Bytes;
5use combine::{
6    attempt, between, choice, from_str, many1, none_of, not_followed_by, one_of,
7    optional, parser,
8    parser::{
9        char::{alpha_num, digit, spaces, string},
10        combinator::recognize,
11        range::{take_while, take_while1},
12        repeat::escaped,
13    },
14    sep_by,
15    stream::{position, Range},
16    token, unexpected_any, EasyParser, ParseError, Parser, RangeStream,
17};
18use compact_str::CompactString;
19use escaping::Escape;
20use netidx_core::pack::Pack;
21use poolshark::local::LPooled;
22use rust_decimal::Decimal;
23use std::{borrow::Cow, result::Result, str::FromStr, sync::LazyLock, time::Duration};
24use triomphe::Arc;
25
26fn should_escape_generic(c: char) -> bool {
27    c.is_control()
28}
29
30pub const VAL_MUST_ESC: [char; 2] = ['\\', '"'];
31pub static VAL_ESC: LazyLock<Escape> = LazyLock::new(|| {
32    Escape::new(
33        '\\',
34        &['\\', '"', '\n', '\r', '\0', '\t'],
35        &[('\n', "n"), ('\r', "r"), ('\t', "t"), ('\0', "0")],
36        Some(should_escape_generic),
37    )
38    .unwrap()
39});
40
41pub fn escaped_string<I>(
42    must_esc: &'static [char],
43    esc: &Escape,
44) -> impl Parser<I, Output = String>
45where
46    I: RangeStream<Token = char>,
47    I::Error: ParseError<I::Token, I::Range, I::Position>,
48    I::Range: Range,
49{
50    recognize(escaped(
51        take_while1(move |c| !must_esc.contains(&c)),
52        esc.get_escape_char(),
53        one_of(
54            esc.get_tr()
55                .iter()
56                .filter_map(|(_, s)| s.chars().next())
57                .chain(must_esc.iter().copied()),
58        ),
59    ))
60    .map(|s| match esc.unescape(&s) {
61        Cow::Borrowed(_) => s, // it didn't need unescaping, so just return it
62        Cow::Owned(s) => s,
63    })
64}
65
66fn quoted<I>(
67    must_escape: &'static [char],
68    esc: &Escape,
69) -> impl Parser<I, Output = String>
70where
71    I: RangeStream<Token = char>,
72    I::Error: ParseError<I::Token, I::Range, I::Position>,
73    I::Range: Range,
74{
75    spaces().with(between(token('"'), token('"'), escaped_string(must_escape, esc)))
76}
77
78fn uint<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
79where
80    I: RangeStream<Token = char>,
81    I::Error: ParseError<I::Token, I::Range, I::Position>,
82    I::Range: Range,
83{
84    many1(digit()).then(|s: CompactString| match s.parse::<T>() {
85        Ok(i) => combine::value(i).right(),
86        Err(_) => unexpected_any("invalid unsigned integer").left(),
87    })
88}
89
90pub fn int<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
91where
92    I: RangeStream<Token = char>,
93    I::Error: ParseError<I::Token, I::Range, I::Position>,
94    I::Range: Range,
95{
96    recognize((optional(token('-')), take_while1(|c: char| c.is_digit(10)))).then(
97        |s: CompactString| match s.parse::<T>() {
98            Ok(i) => combine::value(i).right(),
99            Err(_) => unexpected_any("invalid signed integer").left(),
100        },
101    )
102}
103
104fn flt<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
105where
106    I: RangeStream<Token = char>,
107    I::Error: ParseError<I::Token, I::Range, I::Position>,
108    I::Range: Range,
109{
110    choice((
111        attempt(recognize((
112            optional(token('-')),
113            take_while1(|c: char| c.is_digit(10)),
114            optional(token('.')),
115            take_while(|c: char| c.is_digit(10)),
116            token('e'),
117            optional(token('-')),
118            take_while1(|c: char| c.is_digit(10)),
119        ))),
120        attempt(recognize((
121            optional(token('-')),
122            take_while1(|c: char| c.is_digit(10)),
123            token('.'),
124            take_while(|c: char| c.is_digit(10)),
125        ))),
126    ))
127    .then(|s: CompactString| match s.parse::<T>() {
128        Ok(i) => combine::value(i).right(),
129        Err(_) => unexpected_any("invalid float").left(),
130    })
131}
132
133struct Base64Encoded(Vec<u8>);
134
135impl FromStr for Base64Encoded {
136    type Err = base64::DecodeError;
137
138    fn from_str(s: &str) -> Result<Self, Self::Err> {
139        BASE64.decode(s).map(Base64Encoded)
140    }
141}
142
143fn base64str<I>() -> impl Parser<I, Output = String>
144where
145    I: RangeStream<Token = char>,
146    I::Error: ParseError<I::Token, I::Range, I::Position>,
147    I::Range: Range,
148{
149    choice((
150        attempt(string("null")).map(|_| String::new()),
151        recognize((
152            take_while(|c: char| c.is_ascii_alphanumeric() || c == '+' || c == '/'),
153            take_while(|c: char| c == '='),
154        )),
155    ))
156}
157
158fn constant<I>(typ: &'static str) -> impl Parser<I, Output = ()>
159where
160    I: RangeStream<Token = char>,
161    I::Error: ParseError<I::Token, I::Range, I::Position>,
162    I::Range: Range,
163{
164    string(typ).with(spaces()).with(token(':')).with(spaces()).map(|_| ())
165}
166
167pub fn close_expr<I>() -> impl Parser<I, Output = ()>
168where
169    I: RangeStream<Token = char>,
170    I::Error: ParseError<I::Token, I::Range, I::Position>,
171    I::Range: Range,
172{
173    not_followed_by(none_of([' ', '\n', '\t', ';', ')', ',', ']', '}', '"']))
174}
175
176fn value_<I>(must_escape: &'static [char], esc: &Escape) -> impl Parser<I, Output = Value>
177where
178    I: RangeStream<Token = char>,
179    I::Error: ParseError<I::Token, I::Range, I::Position>,
180    I::Range: Range,
181{
182    spaces().with(choice((
183        attempt(
184            between(
185                token('['),
186                token(']'),
187                sep_by(value(must_escape, esc), attempt(spaces().with(token(',')))),
188            )
189            .map(|mut vals: LPooled<Vec<Value>>| {
190                Value::Array(ValArray::from_iter_exact(vals.drain(..)))
191            }),
192        ),
193        attempt(between(
194            token('{'),
195            token('}'),
196            sep_by(
197                (
198                    value(must_escape, esc),
199                    spaces().with(string("=>")).with(value(must_escape, esc)),
200                ),
201                attempt(spaces().with(token(','))),
202            )
203            .map(|mut vals: LPooled<Vec<(Value, Value)>>| {
204                Value::Map(immutable_chunkmap::map::Map::from_iter(vals.drain(..)))
205            }),
206        )),
207        attempt(quoted(must_escape, esc)).map(|s| Value::String(ArcStr::from(s))),
208        attempt(flt::<_, f64>()).map(Value::F64),
209        attempt(int::<_, i64>()).map(Value::I64),
210        attempt(
211            string("true").skip(not_followed_by(alpha_num())).map(|_| Value::Bool(true)),
212        ),
213        attempt(
214            string("false")
215                .skip(not_followed_by(alpha_num()))
216                .map(|_| Value::Bool(false)),
217        ),
218        attempt(string("null").skip(not_followed_by(alpha_num())).map(|_| Value::Null)),
219        attempt(
220            constant("decimal")
221                .with(flt::<_, Decimal>())
222                .map(|d| Value::Decimal(Arc::new(d))),
223        ),
224        // Integer and float types grouped to stay under choice! limit
225        attempt(choice((
226            attempt(constant("u8").with(uint::<_, u8>()).map(Value::U8)),
227            attempt(constant("i8").with(int::<_, i8>()).map(Value::I8)),
228            attempt(constant("u16").with(uint::<_, u16>()).map(Value::U16)),
229            attempt(constant("i16").with(int::<_, i16>()).map(Value::I16)),
230            attempt(constant("u32").with(uint::<_, u32>()).map(Value::U32)),
231            attempt(constant("v32").with(uint::<_, u32>()).map(Value::V32)),
232            attempt(constant("i32").with(int::<_, i32>()).map(Value::I32)),
233            attempt(constant("z32").with(int::<_, i32>()).map(Value::Z32)),
234            attempt(constant("u64").with(uint::<_, u64>()).map(Value::U64)),
235            attempt(constant("v64").with(uint::<_, u64>()).map(Value::V64)),
236            attempt(constant("i64").with(int::<_, i64>()).map(Value::I64)),
237            attempt(constant("z64").with(int::<_, i64>()).map(Value::Z64)),
238            attempt(constant("f32").with(flt::<_, f32>()).map(Value::F32)),
239            attempt(constant("f64").with(flt::<_, f64>()).map(Value::F64)),
240        ))),
241        attempt(
242            constant("bytes")
243                .with(from_str(base64str()))
244                .map(|Base64Encoded(v)| Value::Bytes(PBytes::new(Bytes::from(v)))),
245        ),
246        attempt(constant("abstract").with(from_str(base64str())).then(
247            |Base64Encoded(v)| match Abstract::decode(&mut &v[..]) {
248                Ok(a) => combine::value(Value::Abstract(a)).right(),
249                Err(_) => unexpected_any("failed to unpack abstract").left(),
250            },
251        )),
252        attempt(
253            constant("error")
254                .with(value(must_escape, esc))
255                .map(|v| Value::Error(Arc::new(v))),
256        ),
257        attempt(
258            constant("datetime")
259                .with(from_str(quoted(must_escape, esc)))
260                .map(|d| Value::DateTime(Arc::new(d))),
261        ),
262        attempt(
263            constant("duration")
264                .with(flt::<_, f64>().and(choice((
265                    string("ns"),
266                    string("us"),
267                    string("ms"),
268                    string("s"),
269                ))))
270                .map(|(n, suffix)| {
271                    let d = match suffix {
272                        "ns" => Duration::from_secs_f64(n / 1e9),
273                        "us" => Duration::from_secs_f64(n / 1e6),
274                        "ms" => Duration::from_secs_f64(n / 1e3),
275                        "s" => Duration::from_secs_f64(n),
276                        _ => unreachable!(),
277                    };
278                    Value::Duration(Arc::new(d))
279                }),
280        ),
281    )))
282}
283
284parser! {
285    pub fn value['a, I](
286        must_escape: &'static [char],
287        esc: &'a Escape
288    )(I) -> Value
289    where [I: RangeStream<Token = char>, I::Range: Range]
290    {
291        value_(must_escape, esc)
292    }
293}
294
295pub fn parse_value(s: &str) -> anyhow::Result<Value> {
296    value(&VAL_MUST_ESC, &VAL_ESC)
297        .easy_parse(position::Stream::new(s))
298        .map(|(r, _)| r)
299        .map_err(|e| anyhow::anyhow!(format!("{}", e)))
300}
301
302#[cfg(test)]
303mod tests {
304    use arcstr::literal;
305
306    use super::*;
307
308    #[test]
309    fn parse() {
310        assert_eq!(Value::U32(23), parse_value("u32:23").unwrap());
311        assert_eq!(Value::V32(42), parse_value("v32:42").unwrap());
312        assert_eq!(Value::I32(-10), parse_value("i32:-10").unwrap());
313        assert_eq!(Value::I32(12321), parse_value("i32:12321").unwrap());
314        assert_eq!(Value::Z32(-99), parse_value("z32:-99").unwrap());
315        assert_eq!(Value::U64(100), parse_value("u64:100").unwrap());
316        assert_eq!(Value::V64(100), parse_value("v64:100").unwrap());
317        assert_eq!(Value::I64(-100), parse_value("i64:-100").unwrap());
318        assert_eq!(Value::I64(-100), parse_value("-100").unwrap());
319        assert_eq!(Value::I64(100), parse_value("i64:100").unwrap());
320        assert_eq!(Value::I64(100), parse_value("100").unwrap());
321        assert_eq!(Value::Z64(-100), parse_value("z64:-100").unwrap());
322        assert_eq!(Value::Z64(100), parse_value("z64:100").unwrap());
323        assert_eq!(Value::F32(3.1415), parse_value("f32:3.1415").unwrap());
324        assert_eq!(Value::F32(675.6), parse_value("f32:675.6").unwrap());
325        assert_eq!(Value::F32(42.3435), parse_value("f32:42.3435").unwrap());
326        assert_eq!(Value::F32(1.123e9), parse_value("f32:1.123e9").unwrap());
327        assert_eq!(Value::F32(1e9), parse_value("f32:1e9").unwrap());
328        assert_eq!(Value::F32(21.2443e-6), parse_value("f32:21.2443e-6").unwrap());
329        assert_eq!(Value::F32(3.), parse_value("f32:3.").unwrap());
330        assert_eq!(Value::F64(3.1415), parse_value("f64:3.1415").unwrap());
331        assert_eq!(Value::F64(3.1415), parse_value("3.1415").unwrap());
332        assert_eq!(Value::F64(1.123e9), parse_value("1.123e9").unwrap());
333        assert_eq!(Value::F64(1e9), parse_value("1e9").unwrap());
334        assert_eq!(Value::F64(21.2443e-6), parse_value("21.2443e-6").unwrap());
335        assert_eq!(Value::F64(3.), parse_value("f64:3.").unwrap());
336        assert_eq!(Value::F64(3.), parse_value("3.").unwrap());
337        let c = ArcStr::from(r#"I've got a lovely "bunch" of (coconuts)"#);
338        let s = r#""I've got a lovely \"bunch\" of (coconuts)""#;
339        assert_eq!(Value::String(c), parse_value(s).unwrap());
340        let c = ArcStr::new();
341        assert_eq!(Value::String(c), parse_value(r#""""#).unwrap());
342        let c = ArcStr::from(r#"""#);
343        let s = r#""\"""#;
344        assert_eq!(Value::String(c), parse_value(s).unwrap());
345        assert_eq!(Value::Bool(true), parse_value("true").unwrap());
346        assert_eq!(Value::Bool(true), parse_value("true ").unwrap());
347        assert_eq!(Value::Bool(false), parse_value("false").unwrap());
348        assert_eq!(Value::Null, parse_value("null").unwrap());
349        assert_eq!(
350            Value::error(literal!("error")),
351            parse_value(r#"error:"error""#).unwrap()
352        );
353    }
354}