netidx_value/
parser.rs

1use crate::{pbuf::PBytes, Value};
2use arcstr::ArcStr;
3use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
4use bytes::Bytes;
5use combine::{
6    attempt, between, choice, from_str, many1, none_of, not_followed_by, one_of,
7    optional, parser,
8    parser::{
9        char::{alpha_num, digit, spaces, string},
10        combinator::recognize,
11        range::{take_while, take_while1},
12        repeat::escaped,
13    },
14    sep_by,
15    stream::{position, Range},
16    token, unexpected_any, EasyParser, ParseError, Parser, RangeStream,
17};
18use compact_str::CompactString;
19use escaping::Escape;
20use rust_decimal::Decimal;
21use std::{borrow::Cow, result::Result, str::FromStr, sync::LazyLock, time::Duration};
22
23fn should_escape_generic(c: char) -> bool {
24    c.is_control()
25}
26
27pub const VAL_MUST_ESC: [char; 2] = ['\\', '"'];
28pub static VAL_ESC: LazyLock<Escape> = LazyLock::new(|| {
29    Escape::new(
30        '\\',
31        &['\\', '"', '\n', '\r', '\0', '\t'],
32        &[('\n', "n"), ('\r', "r"), ('\t', "t"), ('\0', "0")],
33        Some(should_escape_generic),
34    )
35    .unwrap()
36});
37
38pub fn escaped_string<I>(
39    must_esc: &'static [char],
40    esc: &Escape,
41) -> impl Parser<I, Output = String>
42where
43    I: RangeStream<Token = char>,
44    I::Error: ParseError<I::Token, I::Range, I::Position>,
45    I::Range: Range,
46{
47    recognize(escaped(
48        take_while1(move |c| !must_esc.contains(&c)),
49        esc.get_escape_char(),
50        one_of(
51            esc.get_tr()
52                .iter()
53                .filter_map(|(_, s)| s.chars().next())
54                .chain(must_esc.iter().copied()),
55        ),
56    ))
57    .map(|s| match esc.unescape(&s) {
58        Cow::Borrowed(_) => s, // it didn't need unescaping, so just return it
59        Cow::Owned(s) => s,
60    })
61}
62
63fn quoted<I>(
64    must_escape: &'static [char],
65    esc: &Escape,
66) -> impl Parser<I, Output = String>
67where
68    I: RangeStream<Token = char>,
69    I::Error: ParseError<I::Token, I::Range, I::Position>,
70    I::Range: Range,
71{
72    spaces().with(between(token('"'), token('"'), escaped_string(must_escape, esc)))
73}
74
75fn uint<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
76where
77    I: RangeStream<Token = char>,
78    I::Error: ParseError<I::Token, I::Range, I::Position>,
79    I::Range: Range,
80{
81    many1(digit()).then(|s: CompactString| match s.parse::<T>() {
82        Ok(i) => combine::value(i).right(),
83        Err(_) => unexpected_any("invalid unsigned integer").left(),
84    })
85}
86
87pub fn int<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
88where
89    I: RangeStream<Token = char>,
90    I::Error: ParseError<I::Token, I::Range, I::Position>,
91    I::Range: Range,
92{
93    recognize((optional(token('-')), take_while1(|c: char| c.is_digit(10)))).then(
94        |s: CompactString| match s.parse::<T>() {
95            Ok(i) => combine::value(i).right(),
96            Err(_) => unexpected_any("invalid signed integer").left(),
97        },
98    )
99}
100
101fn flt<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
102where
103    I: RangeStream<Token = char>,
104    I::Error: ParseError<I::Token, I::Range, I::Position>,
105    I::Range: Range,
106{
107    choice((
108        attempt(recognize((
109            optional(token('-')),
110            take_while1(|c: char| c.is_digit(10)),
111            optional(token('.')),
112            take_while(|c: char| c.is_digit(10)),
113            token('e'),
114            optional(token('-')),
115            take_while1(|c: char| c.is_digit(10)),
116        ))),
117        attempt(recognize((
118            optional(token('-')),
119            take_while1(|c: char| c.is_digit(10)),
120            token('.'),
121            take_while(|c: char| c.is_digit(10)),
122        ))),
123    ))
124    .then(|s: CompactString| match s.parse::<T>() {
125        Ok(i) => combine::value(i).right(),
126        Err(_) => unexpected_any("invalid float").left(),
127    })
128}
129
130fn dcml<I>() -> impl Parser<I, Output = Decimal>
131where
132    I: RangeStream<Token = char>,
133    I::Error: ParseError<I::Token, I::Range, I::Position>,
134    I::Range: Range,
135{
136    recognize((
137        optional(token('-')),
138        take_while1(|c: char| c.is_digit(10)),
139        optional(token('.')),
140        take_while(|c: char| c.is_digit(10)),
141    ))
142    .then(|s: CompactString| match s.parse::<Decimal>() {
143        Ok(i) => combine::value(i).right(),
144        Err(_) => unexpected_any("invalid decimal").left(),
145    })
146}
147
148struct Base64Encoded(Vec<u8>);
149
150impl FromStr for Base64Encoded {
151    type Err = base64::DecodeError;
152
153    fn from_str(s: &str) -> Result<Self, Self::Err> {
154        BASE64.decode(s).map(Base64Encoded)
155    }
156}
157
158fn base64str<I>() -> impl Parser<I, Output = String>
159where
160    I: RangeStream<Token = char>,
161    I::Error: ParseError<I::Token, I::Range, I::Position>,
162    I::Range: Range,
163{
164    recognize((
165        take_while(|c: char| c.is_ascii_alphanumeric() || c == '+' || c == '/'),
166        take_while(|c: char| c == '='),
167    ))
168}
169
170fn constant<I>(typ: &'static str) -> impl Parser<I, Output = char>
171where
172    I: RangeStream<Token = char>,
173    I::Error: ParseError<I::Token, I::Range, I::Position>,
174    I::Range: Range,
175{
176    string(typ).with(token(':'))
177}
178
179pub fn close_expr<I>() -> impl Parser<I, Output = ()>
180where
181    I: RangeStream<Token = char>,
182    I::Error: ParseError<I::Token, I::Range, I::Position>,
183    I::Range: Range,
184{
185    not_followed_by(none_of([' ', '\n', '\t', ';', ')', ',', ']', '}', '"']))
186}
187
188fn value_<I>(must_escape: &'static [char], esc: &Escape) -> impl Parser<I, Output = Value>
189where
190    I: RangeStream<Token = char>,
191    I::Error: ParseError<I::Token, I::Range, I::Position>,
192    I::Range: Range,
193{
194    spaces().with(choice((
195        attempt(
196            between(token('['), token(']'), sep_by(value(must_escape, esc), token(',')))
197                .map(|vals: Vec<Value>| Value::Array(vals.into())),
198        ),
199        attempt(quoted(must_escape, esc)).map(|s| Value::String(ArcStr::from(s))),
200        attempt(flt::<_, f64>()).map(Value::F64),
201        attempt(int::<_, i64>()).map(Value::I64),
202        attempt(
203            string("true").skip(not_followed_by(alpha_num())).map(|_| Value::Bool(true)),
204        ),
205        attempt(
206            string("false")
207                .skip(not_followed_by(alpha_num()))
208                .map(|_| Value::Bool(false)),
209        ),
210        attempt(string("null").skip(not_followed_by(alpha_num())).map(|_| Value::Null)),
211        attempt(constant("decimal").with(dcml()).map(Value::Decimal)),
212        attempt(constant("u32").with(uint::<_, u32>()).map(Value::U32)),
213        attempt(constant("v32").with(uint::<_, u32>()).map(Value::V32)),
214        attempt(constant("i32").with(int::<_, i32>()).map(Value::I32)),
215        attempt(constant("z32").with(int::<_, i32>()).map(Value::Z32)),
216        attempt(constant("u64").with(uint::<_, u64>()).map(Value::U64)),
217        attempt(constant("v64").with(uint::<_, u64>()).map(Value::V64)),
218        attempt(constant("i64").with(int::<_, i64>()).map(Value::I64)),
219        attempt(constant("z64").with(int::<_, i64>()).map(Value::Z64)),
220        attempt(constant("f32").with(flt::<_, f32>()).map(Value::F32)),
221        attempt(constant("f64").with(flt::<_, f64>()).map(Value::F64)),
222        attempt(
223            constant("bytes")
224                .with(from_str(base64str()))
225                .map(|Base64Encoded(v)| Value::Bytes(PBytes::new(Bytes::from(v)))),
226        ),
227        attempt(
228            constant("error")
229                .with(quoted(must_escape, esc))
230                .map(|s| Value::Error(ArcStr::from(s))),
231        ),
232        attempt(
233            constant("datetime")
234                .with(from_str(quoted(must_escape, esc)))
235                .map(|d| Value::DateTime(d)),
236        ),
237        attempt(
238            constant("duration")
239                .with(flt::<_, f64>().and(choice((
240                    string("ns"),
241                    string("us"),
242                    string("ms"),
243                    string("s"),
244                ))))
245                .map(|(n, suffix)| {
246                    let d = match suffix {
247                        "ns" => Duration::from_secs_f64(n / 1e9),
248                        "us" => Duration::from_secs_f64(n / 1e6),
249                        "ms" => Duration::from_secs_f64(n / 1e3),
250                        "s" => Duration::from_secs_f64(n),
251                        _ => unreachable!(),
252                    };
253                    Value::Duration(d)
254                }),
255        ),
256    )))
257}
258
259parser! {
260    pub fn value['a, I](
261        must_escape: &'static [char],
262        esc: &'a Escape
263    )(I) -> Value
264    where [I: RangeStream<Token = char>, I::Range: Range]
265    {
266        value_(must_escape, esc)
267    }
268}
269
270pub fn parse_value(s: &str) -> anyhow::Result<Value> {
271    value(&VAL_MUST_ESC, &VAL_ESC)
272        .easy_parse(position::Stream::new(s))
273        .map(|(r, _)| r)
274        .map_err(|e| anyhow::anyhow!(format!("{}", e)))
275}
276
277#[cfg(test)]
278mod tests {
279    use super::*;
280
281    #[test]
282    fn parse() {
283        assert_eq!(Value::U32(23), parse_value("u32:23").unwrap());
284        assert_eq!(Value::V32(42), parse_value("v32:42").unwrap());
285        assert_eq!(Value::I32(-10), parse_value("i32:-10").unwrap());
286        assert_eq!(Value::I32(12321), parse_value("i32:12321").unwrap());
287        assert_eq!(Value::Z32(-99), parse_value("z32:-99").unwrap());
288        assert_eq!(Value::U64(100), parse_value("u64:100").unwrap());
289        assert_eq!(Value::V64(100), parse_value("v64:100").unwrap());
290        assert_eq!(Value::I64(-100), parse_value("i64:-100").unwrap());
291        assert_eq!(Value::I64(-100), parse_value("-100").unwrap());
292        assert_eq!(Value::I64(100), parse_value("i64:100").unwrap());
293        assert_eq!(Value::I64(100), parse_value("100").unwrap());
294        assert_eq!(Value::Z64(-100), parse_value("z64:-100").unwrap());
295        assert_eq!(Value::Z64(100), parse_value("z64:100").unwrap());
296        assert_eq!(Value::F32(3.1415), parse_value("f32:3.1415").unwrap());
297        assert_eq!(Value::F32(675.6), parse_value("f32:675.6").unwrap());
298        assert_eq!(Value::F32(42.3435), parse_value("f32:42.3435").unwrap());
299        assert_eq!(Value::F32(1.123e9), parse_value("f32:1.123e9").unwrap());
300        assert_eq!(Value::F32(1e9), parse_value("f32:1e9").unwrap());
301        assert_eq!(Value::F32(21.2443e-6), parse_value("f32:21.2443e-6").unwrap());
302        assert_eq!(Value::F32(3.), parse_value("f32:3.").unwrap());
303        assert_eq!(Value::F64(3.1415), parse_value("f64:3.1415").unwrap());
304        assert_eq!(Value::F64(3.1415), parse_value("3.1415").unwrap());
305        assert_eq!(Value::F64(1.123e9), parse_value("1.123e9").unwrap());
306        assert_eq!(Value::F64(1e9), parse_value("1e9").unwrap());
307        assert_eq!(Value::F64(21.2443e-6), parse_value("21.2443e-6").unwrap());
308        assert_eq!(Value::F64(3.), parse_value("f64:3.").unwrap());
309        assert_eq!(Value::F64(3.), parse_value("3.").unwrap());
310        let c = ArcStr::from(r#"I've got a lovely "bunch" of (coconuts)"#);
311        let s = r#""I've got a lovely \"bunch\" of (coconuts)""#;
312        assert_eq!(Value::String(c), parse_value(s).unwrap());
313        let c = ArcStr::new();
314        assert_eq!(Value::String(c), parse_value(r#""""#).unwrap());
315        let c = ArcStr::from(r#"""#);
316        let s = r#""\"""#;
317        assert_eq!(Value::String(c), parse_value(s).unwrap());
318        assert_eq!(Value::Bool(true), parse_value("true").unwrap());
319        assert_eq!(Value::Bool(true), parse_value("true ").unwrap());
320        assert_eq!(Value::Bool(false), parse_value("false").unwrap());
321        assert_eq!(Value::Null, parse_value("null").unwrap());
322        assert_eq!(
323            Value::Error(ArcStr::from("error")),
324            parse_value(r#"error:"error""#).unwrap()
325        );
326    }
327}