netidx_value/
parser.rs

1use crate::{pbuf::PBytes, Abstract, ValArray, Value};
2use arcstr::ArcStr;
3use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
4use bytes::Bytes;
5use combine::{
6    attempt, between, choice, eof, from_str, look_ahead, many1, none_of, not_followed_by,
7    one_of, optional, parser,
8    parser::{
9        char::{digit, spaces, string},
10        combinator::recognize,
11        range::{take_while, take_while1},
12        repeat::escaped,
13    },
14    sep_by, sep_by1,
15    stream::{position, Range},
16    token, unexpected_any, EasyParser, ParseError, Parser, RangeStream,
17};
18use compact_str::CompactString;
19use escaping::Escape;
20use netidx_core::pack::Pack;
21use poolshark::local::LPooled;
22use rust_decimal::Decimal;
23use std::{borrow::Cow, result::Result, str::FromStr, sync::LazyLock, time::Duration};
24use triomphe::Arc;
25
26// sep_by1, but a separator terminator is allowed, and ignored
27pub fn sep_by1_tok<I, O, OC, EP, SP, TP>(
28    p: EP,
29    sep: SP,
30    term: TP,
31) -> impl Parser<I, Output = OC>
32where
33    I: RangeStream<Token = char>,
34    I::Error: ParseError<I::Token, I::Range, I::Position>,
35    I::Range: Range,
36    OC: Extend<O> + Default,
37    SP: Parser<I>,
38    EP: Parser<I, Output = O>,
39    TP: Parser<I>,
40{
41    sep_by1(choice((look_ahead(term).map(|_| None::<O>), p.map(Some))), sep).map(
42        |mut e: LPooled<Vec<Option<O>>>| {
43            let mut res = OC::default();
44            res.extend(e.drain(..).filter_map(|e| e));
45            res
46        },
47    )
48}
49
50// sep_by, but a separator terminator is allowed, and ignored
51pub fn sep_by_tok<I, O, OC, EP, SP, TP>(
52    p: EP,
53    sep: SP,
54    term: TP,
55) -> impl Parser<I, Output = OC>
56where
57    I: RangeStream<Token = char>,
58    I::Error: ParseError<I::Token, I::Range, I::Position>,
59    I::Range: Range,
60    OC: Extend<O> + Default,
61    SP: Parser<I>,
62    EP: Parser<I, Output = O>,
63    TP: Parser<I>,
64{
65    sep_by(choice((look_ahead(term).map(|_| None::<O>), p.map(Some))), sep).map(
66        |mut e: LPooled<Vec<Option<O>>>| {
67            let mut res = OC::default();
68            res.extend(e.drain(..).filter_map(|e| e));
69            res
70        },
71    )
72}
73
74fn sptoken<I>(t: char) -> impl Parser<I, Output = char>
75where
76    I: RangeStream<Token = char>,
77    I::Error: ParseError<I::Token, I::Range, I::Position>,
78    I::Range: Range,
79{
80    spaces().with(token(t))
81}
82
83fn spstring<I>(t: &'static str) -> impl Parser<I, Output = &'static str>
84where
85    I: RangeStream<Token = char>,
86    I::Error: ParseError<I::Token, I::Range, I::Position>,
87    I::Range: Range,
88{
89    spaces().with(string(t))
90}
91
92fn csep<I>() -> impl Parser<I, Output = char>
93where
94    I: RangeStream<Token = char>,
95    I::Error: ParseError<I::Token, I::Range, I::Position>,
96    I::Range: Range,
97{
98    attempt(spaces().with(token(','))).skip(spaces())
99}
100
101fn should_escape_generic(c: char) -> bool {
102    c.is_control()
103}
104
105pub const VAL_MUST_ESC: [char; 2] = ['\\', '"'];
106pub static VAL_ESC: LazyLock<Escape> = LazyLock::new(|| {
107    Escape::new(
108        '\\',
109        &['\\', '"', '\n', '\r', '\0', '\t'],
110        &[('\n', "n"), ('\r', "r"), ('\t', "t"), ('\0', "0")],
111        Some(should_escape_generic),
112    )
113    .unwrap()
114});
115
116pub fn escaped_string<I>(
117    must_esc: &'static [char],
118    esc: &Escape,
119) -> impl Parser<I, Output = String>
120where
121    I: RangeStream<Token = char>,
122    I::Error: ParseError<I::Token, I::Range, I::Position>,
123    I::Range: Range,
124{
125    recognize(escaped(
126        take_while1(move |c| !must_esc.contains(&c)),
127        esc.get_escape_char(),
128        one_of(
129            esc.get_tr()
130                .iter()
131                .filter_map(|(_, s)| s.chars().next())
132                .chain(must_esc.iter().copied()),
133        ),
134    ))
135    .map(|s| match esc.unescape(&s) {
136        Cow::Borrowed(_) => s, // it didn't need unescaping, so just return it
137        Cow::Owned(s) => s,
138    })
139}
140
141fn quoted<I>(
142    must_escape: &'static [char],
143    esc: &Escape,
144) -> impl Parser<I, Output = String>
145where
146    I: RangeStream<Token = char>,
147    I::Error: ParseError<I::Token, I::Range, I::Position>,
148    I::Range: Range,
149{
150    between(token('"'), token('"'), escaped_string(must_escape, esc))
151}
152
153fn uint<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
154where
155    I: RangeStream<Token = char>,
156    I::Error: ParseError<I::Token, I::Range, I::Position>,
157    I::Range: Range,
158{
159    many1(digit()).then(|s: CompactString| match s.parse::<T>() {
160        Ok(i) => combine::value(i).right(),
161        Err(_) => unexpected_any("invalid unsigned integer").left(),
162    })
163}
164
165pub fn int<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
166where
167    I: RangeStream<Token = char>,
168    I::Error: ParseError<I::Token, I::Range, I::Position>,
169    I::Range: Range,
170{
171    recognize((optional(token('-')), take_while1(|c: char| c.is_digit(10)))).then(
172        |s: CompactString| match s.parse::<T>() {
173            Ok(i) => combine::value(i).right(),
174            Err(_) => unexpected_any("invalid signed integer").left(),
175        },
176    )
177}
178
179fn flt<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
180where
181    I: RangeStream<Token = char>,
182    I::Error: ParseError<I::Token, I::Range, I::Position>,
183    I::Range: Range,
184{
185    choice((
186        attempt(recognize((
187            optional(token('-')),
188            take_while1(|c: char| c.is_digit(10)),
189            optional(token('.')),
190            take_while(|c: char| c.is_digit(10)),
191            token('e'),
192            optional(token('-')),
193            take_while1(|c: char| c.is_digit(10)),
194        ))),
195        attempt(recognize((
196            optional(token('-')),
197            take_while1(|c: char| c.is_digit(10)),
198            token('.'),
199            take_while(|c: char| c.is_digit(10)),
200        ))),
201    ))
202    .then(|s: CompactString| match s.parse::<T>() {
203        Ok(i) => combine::value(i).right(),
204        Err(_) => unexpected_any("invalid float").left(),
205    })
206}
207
208struct Base64Encoded(Vec<u8>);
209
210impl FromStr for Base64Encoded {
211    type Err = base64::DecodeError;
212
213    fn from_str(s: &str) -> Result<Self, Self::Err> {
214        BASE64.decode(s).map(Base64Encoded)
215    }
216}
217
218fn base64str<I>() -> impl Parser<I, Output = String>
219where
220    I: RangeStream<Token = char>,
221    I::Error: ParseError<I::Token, I::Range, I::Position>,
222    I::Range: Range,
223{
224    choice((
225        attempt(string("null")).map(|_| String::new()),
226        recognize((
227            take_while(|c: char| c.is_ascii_alphanumeric() || c == '+' || c == '/'),
228            take_while(|c: char| c == '='),
229        )),
230    ))
231}
232
233fn constant<I>(typ: &'static str) -> impl Parser<I, Output = ()>
234where
235    I: RangeStream<Token = char>,
236    I::Error: ParseError<I::Token, I::Range, I::Position>,
237    I::Range: Range,
238{
239    string(typ).with(spaces()).with(token(':')).with(spaces()).map(|_| ())
240}
241
242pub fn close_expr<I>() -> impl Parser<I, Output = ()>
243where
244    I: RangeStream<Token = char>,
245    I::Error: ParseError<I::Token, I::Range, I::Position>,
246    I::Range: Range,
247{
248    not_followed_by(none_of([' ', '\n', '\t', ';', ')', ',', ']', '}', '"']))
249}
250
251fn value_<I>(must_escape: &'static [char], esc: &Escape) -> impl Parser<I, Output = Value>
252where
253    I: RangeStream<Token = char>,
254    I::Error: ParseError<I::Token, I::Range, I::Position>,
255    I::Range: Range,
256{
257    spaces().with(choice((
258        choice((
259            attempt(constant("u8")).with(uint::<_, u8>().map(Value::U8)),
260            attempt(constant("u16")).with(uint::<_, u16>().map(Value::U16)),
261            attempt(constant("u32")).with(uint::<_, u32>().map(Value::U32)),
262            constant("u64").with(uint::<_, u64>().map(Value::U64)),
263            attempt(constant("i8")).with(int::<_, i8>().map(Value::I8)),
264            attempt(constant("i16")).with(int::<_, i16>().map(Value::I16)),
265            attempt(constant("i32")).with(int::<_, i32>().map(Value::I32)),
266            constant("i64").with(int::<_, i64>().map(Value::I64)),
267            attempt(constant("v32")).with(uint::<_, u32>().map(Value::V32)),
268            constant("v64").with(uint::<_, u64>().map(Value::V64)),
269            attempt(constant("z32")).with(int::<_, i32>().map(Value::Z32)),
270            constant("z64").with(int::<_, i64>().map(Value::Z64)),
271            attempt(constant("f32")).with(flt::<_, f32>().map(Value::F32)),
272            attempt(constant("f64")).with(flt::<_, f64>().map(Value::F64)),
273        )),
274        between(
275            token('['),
276            sptoken(']'),
277            sep_by_tok(value(must_escape, esc), csep(), token(']')),
278        )
279        .map(|mut vals: LPooled<Vec<Value>>| {
280            Value::Array(ValArray::from_iter_exact(vals.drain(..)))
281        }),
282        between(
283            token('{'),
284            sptoken('}'),
285            sep_by_tok(
286                (value(must_escape, esc), spstring("=>").with(value(must_escape, esc))),
287                csep(),
288                token('}'),
289            )
290            .map(|mut vals: LPooled<Vec<(Value, Value)>>| {
291                Value::Map(immutable_chunkmap::map::Map::from_iter(vals.drain(..)))
292            }),
293        ),
294        quoted(must_escape, esc).map(|s| Value::String(ArcStr::from(s))),
295        flt::<_, f64>().map(Value::F64),
296        int::<_, i64>().map(Value::I64),
297        string("true").map(|_| Value::Bool(true)),
298        string("false").map(|_| Value::Bool(false)),
299        string("null").map(|_| Value::Null),
300        constant("bytes")
301            .with(from_str(base64str()))
302            .map(|Base64Encoded(v)| Value::Bytes(PBytes::new(Bytes::from(v)))),
303        constant("abstract").with(from_str(base64str())).then(|Base64Encoded(v)| {
304            match Abstract::decode(&mut &v[..]) {
305                Ok(a) => combine::value(Value::Abstract(a)).right(),
306                Err(_) => unexpected_any("failed to unpack abstract").left(),
307            }
308        }),
309        constant("error")
310            .with(value(must_escape, esc))
311            .map(|v| Value::Error(Arc::new(v))),
312        attempt(constant("decimal"))
313            .with(flt::<_, Decimal>())
314            .map(|d| Value::Decimal(Arc::new(d))),
315        attempt(constant("datetime"))
316            .with(from_str(quoted(must_escape, esc)))
317            .map(|d| Value::DateTime(Arc::new(d))),
318        constant("duration")
319            .with(flt::<_, f64>().and(choice((
320                string("ns"),
321                string("us"),
322                string("ms"),
323                string("s"),
324            ))))
325            .map(|(n, suffix)| {
326                let d = match suffix {
327                    "ns" => Duration::from_secs_f64(n / 1e9),
328                    "us" => Duration::from_secs_f64(n / 1e6),
329                    "ms" => Duration::from_secs_f64(n / 1e3),
330                    "s" => Duration::from_secs_f64(n),
331                    _ => unreachable!(),
332                };
333                Value::Duration(Arc::new(d))
334            }),
335    )))
336}
337
338parser! {
339    pub fn value['a, I](
340        must_escape: &'static [char],
341        esc: &'a Escape
342    )(I) -> Value
343    where [I: RangeStream<Token = char>, I::Range: Range]
344    {
345        value_(must_escape, esc)
346    }
347}
348
349pub fn parse_value(s: &str) -> anyhow::Result<Value> {
350    value(&VAL_MUST_ESC, &VAL_ESC)
351        .skip(spaces())
352        .skip(eof())
353        .easy_parse(position::Stream::new(s))
354        .map(|(r, _)| r)
355        .map_err(|e| anyhow::anyhow!(format!("{}", e)))
356}
357
358#[cfg(test)]
359mod tests {
360    use arcstr::literal;
361
362    use crate::Map;
363
364    use super::*;
365
366    #[test]
367    fn parse() {
368        assert_eq!(Value::U32(23), parse_value("u32:23").unwrap());
369        assert_eq!(Value::V32(42), parse_value("v32:42").unwrap());
370        assert_eq!(Value::I32(-10), parse_value("i32:-10").unwrap());
371        assert_eq!(Value::I32(12321), parse_value("i32:12321").unwrap());
372        assert_eq!(Value::Z32(-99), parse_value("z32:-99").unwrap());
373        assert_eq!(Value::U64(100), parse_value("u64:100").unwrap());
374        assert_eq!(Value::V64(100), parse_value("v64:100").unwrap());
375        assert_eq!(Value::I64(-100), parse_value("i64:-100").unwrap());
376        assert_eq!(Value::I64(-100), parse_value("-100").unwrap());
377        assert_eq!(Value::I64(100), parse_value("i64:100").unwrap());
378        assert_eq!(Value::I64(100), parse_value("100").unwrap());
379        assert_eq!(Value::Z64(-100), parse_value("z64:-100").unwrap());
380        assert_eq!(Value::Z64(100), parse_value("z64:100").unwrap());
381        assert_eq!(Value::F32(3.1415), parse_value("f32:3.1415").unwrap());
382        assert_eq!(Value::F32(675.6), parse_value("f32:675.6").unwrap());
383        assert_eq!(Value::F32(42.3435), parse_value("f32:42.3435").unwrap());
384        assert_eq!(Value::F32(1.123e9), parse_value("f32:1.123e9").unwrap());
385        assert_eq!(Value::F32(1e9), parse_value("f32:1e9").unwrap());
386        assert_eq!(Value::F32(21.2443e-6), parse_value("f32:21.2443e-6").unwrap());
387        assert_eq!(Value::F32(3.), parse_value("f32:3.").unwrap());
388        assert_eq!(Value::F64(3.1415), parse_value("f64:3.1415").unwrap());
389        assert_eq!(Value::F64(3.1415), parse_value("3.1415").unwrap());
390        assert_eq!(Value::F64(1.123e9), parse_value("1.123e9").unwrap());
391        assert_eq!(Value::F64(1e9), parse_value("1e9").unwrap());
392        assert_eq!(Value::F64(21.2443e-6), parse_value("21.2443e-6").unwrap());
393        assert_eq!(Value::F64(3.), parse_value("f64:3.").unwrap());
394        assert_eq!(Value::F64(3.), parse_value("3.").unwrap());
395        let c = ArcStr::from(r#"I've got a lovely "bunch" of (coconuts)"#);
396        let s = r#""I've got a lovely \"bunch\" of (coconuts)""#;
397        assert_eq!(Value::String(c), parse_value(s).unwrap());
398        let c = ArcStr::new();
399        assert_eq!(Value::String(c), parse_value(r#""""#).unwrap());
400        let c = ArcStr::from(r#"""#);
401        let s = r#""\"""#;
402        assert_eq!(Value::String(c), parse_value(s).unwrap());
403        assert_eq!(Value::Bool(true), parse_value("true").unwrap());
404        assert_eq!(Value::Bool(true), parse_value("true ").unwrap());
405        assert_eq!(Value::Bool(false), parse_value("false").unwrap());
406        assert_eq!(Value::Null, parse_value("null").unwrap());
407        assert_eq!(
408            Value::error(literal!("error")),
409            parse_value(r#"error:"error""#).unwrap()
410        );
411        let a = ValArray::from_iter_exact(
412            [Value::I64(42), Value::String(literal!("hello world"))].into_iter(),
413        );
414        assert_eq!(
415            Value::Array(a.clone()),
416            parse_value(r#"[42, "hello world", ]"#).unwrap()
417        );
418        assert_eq!(Value::Array(a), parse_value(r#"[42, "hello world"]"#).unwrap());
419        let m = Map::from_iter([
420            (Value::I64(42), Value::String(literal!("hello world"))),
421            (Value::String(literal!("hello world")), Value::I64(42)),
422        ]);
423        assert_eq!(
424            Value::Map(m.clone()),
425            parse_value(r#"{ 42 => "hello world", "hello world" => 42, }"#).unwrap()
426        );
427        assert_eq!(
428            Value::Map(m.clone()),
429            parse_value(r#"{ 42 => "hello world", "hello world" => 42}"#).unwrap()
430        )
431    }
432}