Skip to main content

netidx_value/
parser.rs

1use crate::{pbuf::PBytes, Abstract, ValArray, Value};
2use arcstr::ArcStr;
3use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
4use bytes::Bytes;
5use combine::{
6    attempt, between, choice, eof, from_str, look_ahead, many1, none_of, not_followed_by,
7    one_of, optional, parser, satisfy,
8    parser::{
9        char::{alpha_num, digit, spaces, string},
10        combinator::recognize,
11        range::{take_while, take_while1},
12        repeat::escaped,
13    },
14    sep_by, sep_by1,
15    stream::{position, Range},
16    token, unexpected_any, EasyParser, ParseError, Parser, RangeStream,
17};
18use compact_str::CompactString;
19use escaping::Escape;
20use netidx_core::pack::Pack;
21use poolshark::local::LPooled;
22use rust_decimal::Decimal;
23use std::{borrow::Cow, str::FromStr, sync::LazyLock, time::Duration};
24use triomphe::Arc;
25
26// sep_by1, but a separator terminator is allowed, and ignored
27pub fn sep_by1_tok<I, O, OC, EP, SP, TP>(
28    p: EP,
29    sep: SP,
30    term: TP,
31) -> impl Parser<I, Output = OC>
32where
33    I: RangeStream<Token = char>,
34    I::Error: ParseError<I::Token, I::Range, I::Position>,
35    I::Range: Range,
36    OC: Extend<O> + Default,
37    SP: Parser<I>,
38    EP: Parser<I, Output = O>,
39    TP: Parser<I>,
40{
41    sep_by1(choice((look_ahead(term).map(|_| None::<O>), p.map(Some))), sep).map(
42        |mut e: LPooled<Vec<Option<O>>>| {
43            let mut res = OC::default();
44            res.extend(e.drain(..).filter_map(|e| e));
45            res
46        },
47    )
48}
49
50// sep_by, but a separator terminator is allowed, and ignored
51pub fn sep_by_tok<I, O, OC, EP, SP, TP>(
52    p: EP,
53    sep: SP,
54    term: TP,
55) -> impl Parser<I, Output = OC>
56where
57    I: RangeStream<Token = char>,
58    I::Error: ParseError<I::Token, I::Range, I::Position>,
59    I::Range: Range,
60    OC: Extend<O> + Default,
61    SP: Parser<I>,
62    EP: Parser<I, Output = O>,
63    TP: Parser<I>,
64{
65    sep_by(choice((look_ahead(term).map(|_| None::<O>), p.map(Some))), sep).map(
66        |mut e: LPooled<Vec<Option<O>>>| {
67            let mut res = OC::default();
68            res.extend(e.drain(..).filter_map(|e| e));
69            res
70        },
71    )
72}
73
74pub fn not_prefix<I>() -> impl Parser<I, Output = ()>
75where
76    I: RangeStream<Token = char>,
77    I::Error: ParseError<I::Token, I::Range, I::Position>,
78    I::Range: Range,
79{
80    not_followed_by(choice((token('_'), alpha_num())))
81}
82
83fn sptoken<I>(t: char) -> impl Parser<I, Output = char>
84where
85    I: RangeStream<Token = char>,
86    I::Error: ParseError<I::Token, I::Range, I::Position>,
87    I::Range: Range,
88{
89    spaces().with(token(t))
90}
91
92fn spstring<I>(t: &'static str) -> impl Parser<I, Output = &'static str>
93where
94    I: RangeStream<Token = char>,
95    I::Error: ParseError<I::Token, I::Range, I::Position>,
96    I::Range: Range,
97{
98    spaces().with(string(t))
99}
100
101fn csep<I>() -> impl Parser<I, Output = char>
102where
103    I: RangeStream<Token = char>,
104    I::Error: ParseError<I::Token, I::Range, I::Position>,
105    I::Range: Range,
106{
107    attempt(spaces().with(token(','))).skip(spaces())
108}
109
110fn should_escape_generic(c: char) -> bool {
111    c.is_control()
112}
113
114pub const VAL_MUST_ESC: [char; 2] = ['\\', '"'];
115pub static VAL_ESC: LazyLock<Escape> = LazyLock::new(|| {
116    Escape::new(
117        '\\',
118        &['\\', '"', '\n', '\r', '\0', '\t'],
119        &[('\n', "n"), ('\r', "r"), ('\t', "t"), ('\0', "0")],
120        Some(should_escape_generic),
121    )
122    .unwrap()
123});
124
125pub fn escaped_string<I>(
126    must_esc: &'static [char],
127    esc: &Escape,
128) -> impl Parser<I, Output = String>
129where
130    I: RangeStream<Token = char>,
131    I::Error: ParseError<I::Token, I::Range, I::Position>,
132    I::Range: Range,
133{
134    recognize(escaped(
135        take_while1(move |c| !must_esc.contains(&c)),
136        esc.get_escape_char(),
137        one_of(
138            esc.get_tr()
139                .iter()
140                .filter_map(|(_, s)| s.chars().next())
141                .chain(must_esc.iter().copied()),
142        ),
143    ))
144    .map(|s| match esc.unescape(&s) {
145        Cow::Borrowed(_) => s, // it didn't need unescaping, so just return it
146        Cow::Owned(s) => s,
147    })
148}
149
150fn quoted<I>(
151    must_escape: &'static [char],
152    esc: &Escape,
153) -> impl Parser<I, Output = String>
154where
155    I: RangeStream<Token = char>,
156    I::Error: ParseError<I::Token, I::Range, I::Position>,
157    I::Range: Range,
158{
159    between(token('"'), token('"'), escaped_string(must_escape, esc))
160}
161
162pub trait FromStrRadix: Sized {
163    fn from_str_radix(s: &str, radix: u32) -> Result<Self, std::num::ParseIntError>;
164}
165
166macro_rules! impl_from_str_radix {
167    ($($t:ty),*) => { $(
168        impl FromStrRadix for $t {
169            fn from_str_radix(s: &str, radix: u32) -> Result<Self, std::num::ParseIntError> {
170                <$t>::from_str_radix(s, radix)
171            }
172        }
173    )* };
174}
175
176impl_from_str_radix!(u8, i8, u16, i16, u32, i32, u64, i64, usize, isize);
177
178fn radix_prefix<I>() -> impl Parser<I, Output = (u32, CompactString)>
179where
180    I: RangeStream<Token = char>,
181    I::Error: ParseError<I::Token, I::Range, I::Position>,
182    I::Range: Range,
183{
184    choice((
185        attempt(
186            token('0')
187                .with(one_of(['x', 'X']))
188                .with(many1(satisfy(|c: char| c.is_ascii_hexdigit())))
189                .map(|s: CompactString| (16u32, s)),
190        ),
191        attempt(
192            token('0')
193                .with(one_of(['b', 'B']))
194                .with(many1(satisfy(|c: char| c == '0' || c == '1')))
195                .map(|s: CompactString| (2u32, s)),
196        ),
197        attempt(
198            token('0')
199                .with(one_of(['o', 'O']))
200                .with(many1(satisfy(|c: char| c.is_digit(8))))
201                .map(|s: CompactString| (8u32, s)),
202        ),
203    ))
204}
205
206fn uint<I, T: FromStrRadix + Clone + Copy>() -> impl Parser<I, Output = T>
207where
208    I: RangeStream<Token = char>,
209    I::Error: ParseError<I::Token, I::Range, I::Position>,
210    I::Range: Range,
211{
212    choice((
213        radix_prefix(),
214        many1(digit()).map(|s: CompactString| (10u32, s)),
215    ))
216    .then(|(radix, digits): (u32, CompactString)| {
217        match T::from_str_radix(&digits, radix) {
218            Ok(i) => combine::value(i).right(),
219            Err(_) => unexpected_any("invalid unsigned integer").left(),
220        }
221    })
222}
223
224pub fn int<I, T: FromStrRadix + Clone + Copy>() -> impl Parser<I, Output = T>
225where
226    I: RangeStream<Token = char>,
227    I::Error: ParseError<I::Token, I::Range, I::Position>,
228    I::Range: Range,
229{
230    choice((
231        attempt(optional(token('-')).and(radix_prefix())).then(
232            |(sign, (radix, digits)): (Option<char>, (u32, CompactString))| {
233                let s = if sign.is_some() {
234                    let mut s = CompactString::new("-");
235                    s.push_str(&digits);
236                    s
237                } else {
238                    digits
239                };
240                match T::from_str_radix(&s, radix) {
241                    Ok(i) => combine::value(i).right(),
242                    Err(_) => unexpected_any("invalid signed integer").left(),
243                }
244            },
245        ),
246        recognize((optional(token('-')), take_while1(|c: char| c.is_digit(10)))).then(
247            |s: CompactString| match T::from_str_radix(&s, 10) {
248                Ok(i) => combine::value(i).right(),
249                Err(_) => unexpected_any("invalid signed integer").left(),
250            },
251        ),
252    ))
253}
254
255fn flt<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
256where
257    I: RangeStream<Token = char>,
258    I::Error: ParseError<I::Token, I::Range, I::Position>,
259    I::Range: Range,
260{
261    choice((
262        attempt(recognize((
263            optional(token('-')),
264            take_while1(|c: char| c.is_digit(10)),
265            optional(token('.')),
266            take_while(|c: char| c.is_digit(10)),
267            token('e'),
268            optional(token('-')),
269            take_while1(|c: char| c.is_digit(10)),
270        ))),
271        attempt(recognize((
272            optional(token('-')),
273            take_while1(|c: char| c.is_digit(10)),
274            token('.'),
275            take_while(|c: char| c.is_digit(10)),
276        ))),
277    ))
278    .then(|s: CompactString| match s.parse::<T>() {
279        Ok(i) => combine::value(i).right(),
280        Err(_) => unexpected_any("invalid float").left(),
281    })
282}
283
284fn base64<I>() -> impl Parser<I, Output = LPooled<Vec<u8>>>
285where
286    I: RangeStream<Token = char>,
287    I::Error: ParseError<I::Token, I::Range, I::Position>,
288    I::Range: Range,
289{
290    recognize((
291        take_while(|c: char| c.is_ascii_alphanumeric() || c == '+' || c == '/'),
292        take_while(|c: char| c == '='),
293    ))
294    .then(|s: LPooled<String>| {
295        let s = if &*s == "==" { LPooled::take() } else { s };
296        let mut buf: LPooled<Vec<u8>> = LPooled::take();
297        match BASE64.decode_vec(&*s, &mut buf) {
298            Ok(()) => combine::value(buf).right(),
299            Err(_) => unexpected_any("base64 decode failed").left(),
300        }
301    })
302}
303
304fn constant<I>(typ: &'static str) -> impl Parser<I, Output = ()>
305where
306    I: RangeStream<Token = char>,
307    I::Error: ParseError<I::Token, I::Range, I::Position>,
308    I::Range: Range,
309{
310    string(typ).with(spaces()).with(token(':')).with(spaces()).map(|_| ())
311}
312
313pub fn close_expr<I>() -> impl Parser<I, Output = ()>
314where
315    I: RangeStream<Token = char>,
316    I::Error: ParseError<I::Token, I::Range, I::Position>,
317    I::Range: Range,
318{
319    not_followed_by(none_of([' ', '\n', '\t', ';', ')', ',', ']', '}', '"']))
320}
321
322fn value_<I>(must_escape: &'static [char], esc: &Escape) -> impl Parser<I, Output = Value>
323where
324    I: RangeStream<Token = char>,
325    I::Error: ParseError<I::Token, I::Range, I::Position>,
326    I::Range: Range,
327{
328    spaces().with(choice((
329        choice((
330            attempt(constant("u8")).with(uint::<_, u8>().map(Value::U8)),
331            attempt(constant("u16")).with(uint::<_, u16>().map(Value::U16)),
332            attempt(constant("u32")).with(uint::<_, u32>().map(Value::U32)),
333            constant("u64").with(uint::<_, u64>().map(Value::U64)),
334            attempt(constant("i8")).with(int::<_, i8>().map(Value::I8)),
335            attempt(constant("i16")).with(int::<_, i16>().map(Value::I16)),
336            attempt(constant("i32")).with(int::<_, i32>().map(Value::I32)),
337            constant("i64").with(int::<_, i64>().map(Value::I64)),
338            attempt(constant("v32")).with(uint::<_, u32>().map(Value::V32)),
339            constant("v64").with(uint::<_, u64>().map(Value::V64)),
340            attempt(constant("z32")).with(int::<_, i32>().map(Value::Z32)),
341            constant("z64").with(int::<_, i64>().map(Value::Z64)),
342            attempt(constant("f32")).with(flt::<_, f32>().map(Value::F32)),
343            attempt(constant("f64")).with(flt::<_, f64>().map(Value::F64)),
344        )),
345        between(
346            token('['),
347            sptoken(']'),
348            sep_by_tok(value(must_escape, esc), csep(), token(']')),
349        )
350        .map(|mut vals: LPooled<Vec<Value>>| {
351            Value::Array(ValArray::from_iter_exact(vals.drain(..)))
352        }),
353        between(
354            token('{'),
355            sptoken('}'),
356            sep_by_tok(
357                (value(must_escape, esc), spstring("=>").with(value(must_escape, esc))),
358                csep(),
359                token('}'),
360            )
361            .map(|mut vals: LPooled<Vec<(Value, Value)>>| {
362                Value::Map(immutable_chunkmap::map::Map::from_iter(vals.drain(..)))
363            }),
364        ),
365        quoted(must_escape, esc).map(|s| Value::String(ArcStr::from(s))),
366        flt::<_, f64>().map(Value::F64),
367        int::<_, i64>().map(Value::I64),
368        attempt(string("true").skip(not_prefix())).map(|_| Value::Bool(true)),
369        attempt(string("false").skip(not_prefix())).map(|_| Value::Bool(false)),
370        attempt(string("null").skip(not_prefix())).map(|_| Value::Null),
371        constant("bytes")
372            .with(base64())
373            .map(|v| Value::Bytes(PBytes::new(Bytes::from(LPooled::detach(v))))),
374        constant("abstract").with(base64()).then(|v| {
375            match Abstract::decode(&mut &v[..]) {
376                Ok(a) => combine::value(Value::Abstract(a)).right(),
377                Err(_) => unexpected_any("failed to unpack abstract").left(),
378            }
379        }),
380        constant("error")
381            .with(value(must_escape, esc))
382            .map(|v| Value::Error(Arc::new(v))),
383        attempt(constant("decimal"))
384            .with(flt::<_, Decimal>())
385            .map(|d| Value::Decimal(Arc::new(d))),
386        attempt(constant("datetime"))
387            .with(from_str(quoted(must_escape, esc)))
388            .map(|d| Value::DateTime(Arc::new(d))),
389        constant("duration")
390            .with(flt::<_, f64>().and(choice((
391                string("ns"),
392                string("us"),
393                string("ms"),
394                string("s"),
395            ))))
396            .map(|(n, suffix)| {
397                let d = match suffix {
398                    "ns" => Duration::from_secs_f64(n / 1e9),
399                    "us" => Duration::from_secs_f64(n / 1e6),
400                    "ms" => Duration::from_secs_f64(n / 1e3),
401                    "s" => Duration::from_secs_f64(n),
402                    _ => unreachable!(),
403                };
404                Value::Duration(Arc::new(d))
405            }),
406    )))
407}
408
409parser! {
410    pub fn value['a, I](
411        must_escape: &'static [char],
412        esc: &'a Escape
413    )(I) -> Value
414    where [I: RangeStream<Token = char>, I::Range: Range]
415    {
416        value_(must_escape, esc)
417    }
418}
419
420pub fn parse_value(s: &str) -> anyhow::Result<Value> {
421    value(&VAL_MUST_ESC, &VAL_ESC)
422        .skip(spaces())
423        .skip(eof())
424        .easy_parse(position::Stream::new(s))
425        .map(|(r, _)| r)
426        .map_err(|e| anyhow::anyhow!(format!("{}", e)))
427}
428
429#[cfg(test)]
430mod tests {
431    use arcstr::literal;
432
433    use crate::Map;
434
435    use super::*;
436
437    #[test]
438    fn parse() {
439        assert_eq!(Value::U32(23), parse_value("u32:23").unwrap());
440        assert_eq!(Value::V32(42), parse_value("v32:42").unwrap());
441        assert_eq!(Value::I32(-10), parse_value("i32:-10").unwrap());
442        assert_eq!(Value::I32(12321), parse_value("i32:12321").unwrap());
443        assert_eq!(Value::Z32(-99), parse_value("z32:-99").unwrap());
444        assert_eq!(Value::U64(100), parse_value("u64:100").unwrap());
445        assert_eq!(Value::V64(100), parse_value("v64:100").unwrap());
446        assert_eq!(Value::I64(-100), parse_value("i64:-100").unwrap());
447        assert_eq!(Value::I64(-100), parse_value("-100").unwrap());
448        assert_eq!(Value::I64(100), parse_value("i64:100").unwrap());
449        assert_eq!(Value::I64(100), parse_value("100").unwrap());
450        assert_eq!(Value::Z64(-100), parse_value("z64:-100").unwrap());
451        assert_eq!(Value::Z64(100), parse_value("z64:100").unwrap());
452        assert_eq!(Value::F32(3.1415), parse_value("f32:3.1415").unwrap());
453        assert_eq!(Value::F32(675.6), parse_value("f32:675.6").unwrap());
454        assert_eq!(Value::F32(42.3435), parse_value("f32:42.3435").unwrap());
455        assert_eq!(Value::F32(1.123e9), parse_value("f32:1.123e9").unwrap());
456        assert_eq!(Value::F32(1e9), parse_value("f32:1e9").unwrap());
457        assert_eq!(Value::F32(21.2443e-6), parse_value("f32:21.2443e-6").unwrap());
458        assert_eq!(Value::F32(3.), parse_value("f32:3.").unwrap());
459        assert_eq!(Value::F64(3.1415), parse_value("f64:3.1415").unwrap());
460        assert_eq!(Value::F64(3.1415), parse_value("3.1415").unwrap());
461        assert_eq!(Value::F64(1.123e9), parse_value("1.123e9").unwrap());
462        assert_eq!(Value::F64(1e9), parse_value("1e9").unwrap());
463        assert_eq!(Value::F64(21.2443e-6), parse_value("21.2443e-6").unwrap());
464        assert_eq!(Value::F64(3.), parse_value("f64:3.").unwrap());
465        assert_eq!(Value::F64(3.), parse_value("3.").unwrap());
466        let c = ArcStr::from(r#"I've got a lovely "bunch" of (coconuts)"#);
467        let s = r#""I've got a lovely \"bunch\" of (coconuts)""#;
468        assert_eq!(Value::String(c), parse_value(s).unwrap());
469        let c = ArcStr::new();
470        assert_eq!(Value::String(c), parse_value(r#""""#).unwrap());
471        let c = ArcStr::from(r#"""#);
472        let s = r#""\"""#;
473        assert_eq!(Value::String(c), parse_value(s).unwrap());
474        assert_eq!(Value::Bool(true), parse_value("true").unwrap());
475        assert_eq!(Value::Bool(true), parse_value("true ").unwrap());
476        assert_eq!(Value::Bool(false), parse_value("false").unwrap());
477        assert_eq!(Value::Null, parse_value("null").unwrap());
478        assert_eq!(
479            Value::error(literal!("error")),
480            parse_value(r#"error:"error""#).unwrap()
481        );
482        let a = ValArray::from_iter_exact(
483            [Value::I64(42), Value::String(literal!("hello world"))].into_iter(),
484        );
485        assert_eq!(
486            Value::Array(a.clone()),
487            parse_value(r#"[42, "hello world", ]"#).unwrap()
488        );
489        assert_eq!(Value::Array(a), parse_value(r#"[42, "hello world"]"#).unwrap());
490        let m = Map::from_iter([
491            (Value::I64(42), Value::String(literal!("hello world"))),
492            (Value::String(literal!("hello world")), Value::I64(42)),
493        ]);
494        assert_eq!(
495            Value::Map(m.clone()),
496            parse_value(r#"{ 42 => "hello world", "hello world" => 42, }"#).unwrap()
497        );
498        assert_eq!(
499            Value::Map(m.clone()),
500            parse_value(r#"{ 42 => "hello world", "hello world" => 42}"#).unwrap()
501        );
502        // hex literals
503        assert_eq!(Value::U8(255), parse_value("u8:0xFF").unwrap());
504        assert_eq!(Value::U8(255), parse_value("u8:0XFF").unwrap());
505        assert_eq!(Value::I32(-31), parse_value("i32:-0x1F").unwrap());
506        assert_eq!(Value::U64(0xDEAD), parse_value("u64:0xDEAD").unwrap());
507        assert_eq!(Value::I64(255), parse_value("i64:0xFF").unwrap());
508        // binary literals
509        assert_eq!(Value::U16(10), parse_value("u16:0b1010").unwrap());
510        assert_eq!(Value::U16(10), parse_value("u16:0B1010").unwrap());
511        assert_eq!(Value::I8(-1), parse_value("i8:-0b1").unwrap());
512        // octal literals
513        assert_eq!(Value::U32(63), parse_value("u32:0o77").unwrap());
514        assert_eq!(Value::U32(63), parse_value("u32:0O77").unwrap());
515        assert_eq!(Value::I64(-8), parse_value("i64:-0o10").unwrap());
516        // bare hex/bin/oct as i64
517        assert_eq!(Value::I64(255), parse_value("0xFF").unwrap());
518        assert_eq!(Value::I64(10), parse_value("0b1010").unwrap());
519        assert_eq!(Value::I64(63), parse_value("0o77").unwrap());
520        assert_eq!(Value::I64(-255), parse_value("-0xFF").unwrap());
521    }
522}