netidx_value/
parser.rs

1use crate::{pbuf::PBytes, ValArray, Value};
2use arcstr::ArcStr;
3use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
4use bytes::Bytes;
5use combine::{
6    attempt, between, choice, from_str, many1, none_of, not_followed_by, one_of,
7    optional, parser,
8    parser::{
9        char::{alpha_num, digit, spaces, string},
10        combinator::recognize,
11        range::{take_while, take_while1},
12        repeat::escaped,
13    },
14    sep_by,
15    stream::{position, Range},
16    token, unexpected_any, EasyParser, ParseError, Parser, RangeStream,
17};
18use compact_str::CompactString;
19use escaping::Escape;
20use poolshark::local::LPooled;
21use rust_decimal::Decimal;
22use std::{borrow::Cow, result::Result, str::FromStr, sync::LazyLock, time::Duration};
23use triomphe::Arc;
24
25fn should_escape_generic(c: char) -> bool {
26    c.is_control()
27}
28
29pub const VAL_MUST_ESC: [char; 2] = ['\\', '"'];
30pub static VAL_ESC: LazyLock<Escape> = LazyLock::new(|| {
31    Escape::new(
32        '\\',
33        &['\\', '"', '\n', '\r', '\0', '\t'],
34        &[('\n', "n"), ('\r', "r"), ('\t', "t"), ('\0', "0")],
35        Some(should_escape_generic),
36    )
37    .unwrap()
38});
39
40pub fn escaped_string<I>(
41    must_esc: &'static [char],
42    esc: &Escape,
43) -> impl Parser<I, Output = String>
44where
45    I: RangeStream<Token = char>,
46    I::Error: ParseError<I::Token, I::Range, I::Position>,
47    I::Range: Range,
48{
49    recognize(escaped(
50        take_while1(move |c| !must_esc.contains(&c)),
51        esc.get_escape_char(),
52        one_of(
53            esc.get_tr()
54                .iter()
55                .filter_map(|(_, s)| s.chars().next())
56                .chain(must_esc.iter().copied()),
57        ),
58    ))
59    .map(|s| match esc.unescape(&s) {
60        Cow::Borrowed(_) => s, // it didn't need unescaping, so just return it
61        Cow::Owned(s) => s,
62    })
63}
64
65fn quoted<I>(
66    must_escape: &'static [char],
67    esc: &Escape,
68) -> impl Parser<I, Output = String>
69where
70    I: RangeStream<Token = char>,
71    I::Error: ParseError<I::Token, I::Range, I::Position>,
72    I::Range: Range,
73{
74    spaces().with(between(token('"'), token('"'), escaped_string(must_escape, esc)))
75}
76
77fn uint<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
78where
79    I: RangeStream<Token = char>,
80    I::Error: ParseError<I::Token, I::Range, I::Position>,
81    I::Range: Range,
82{
83    many1(digit()).then(|s: CompactString| match s.parse::<T>() {
84        Ok(i) => combine::value(i).right(),
85        Err(_) => unexpected_any("invalid unsigned integer").left(),
86    })
87}
88
89pub fn int<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
90where
91    I: RangeStream<Token = char>,
92    I::Error: ParseError<I::Token, I::Range, I::Position>,
93    I::Range: Range,
94{
95    recognize((optional(token('-')), take_while1(|c: char| c.is_digit(10)))).then(
96        |s: CompactString| match s.parse::<T>() {
97            Ok(i) => combine::value(i).right(),
98            Err(_) => unexpected_any("invalid signed integer").left(),
99        },
100    )
101}
102
103fn flt<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
104where
105    I: RangeStream<Token = char>,
106    I::Error: ParseError<I::Token, I::Range, I::Position>,
107    I::Range: Range,
108{
109    choice((
110        attempt(recognize((
111            optional(token('-')),
112            take_while1(|c: char| c.is_digit(10)),
113            optional(token('.')),
114            take_while(|c: char| c.is_digit(10)),
115            token('e'),
116            optional(token('-')),
117            take_while1(|c: char| c.is_digit(10)),
118        ))),
119        attempt(recognize((
120            optional(token('-')),
121            take_while1(|c: char| c.is_digit(10)),
122            token('.'),
123            take_while(|c: char| c.is_digit(10)),
124        ))),
125    ))
126    .then(|s: CompactString| match s.parse::<T>() {
127        Ok(i) => combine::value(i).right(),
128        Err(_) => unexpected_any("invalid float").left(),
129    })
130}
131
132struct Base64Encoded(Vec<u8>);
133
134impl FromStr for Base64Encoded {
135    type Err = base64::DecodeError;
136
137    fn from_str(s: &str) -> Result<Self, Self::Err> {
138        BASE64.decode(s).map(Base64Encoded)
139    }
140}
141
142fn base64str<I>() -> impl Parser<I, Output = String>
143where
144    I: RangeStream<Token = char>,
145    I::Error: ParseError<I::Token, I::Range, I::Position>,
146    I::Range: Range,
147{
148    choice((
149        attempt(string("null")).map(|_| String::new()),
150        recognize((
151            take_while(|c: char| c.is_ascii_alphanumeric() || c == '+' || c == '/'),
152            take_while(|c: char| c == '='),
153        )),
154    ))
155}
156
157fn constant<I>(typ: &'static str) -> impl Parser<I, Output = ()>
158where
159    I: RangeStream<Token = char>,
160    I::Error: ParseError<I::Token, I::Range, I::Position>,
161    I::Range: Range,
162{
163    string(typ).with(spaces()).with(token(':')).with(spaces()).map(|_| ())
164}
165
166pub fn close_expr<I>() -> impl Parser<I, Output = ()>
167where
168    I: RangeStream<Token = char>,
169    I::Error: ParseError<I::Token, I::Range, I::Position>,
170    I::Range: Range,
171{
172    not_followed_by(none_of([' ', '\n', '\t', ';', ')', ',', ']', '}', '"']))
173}
174
175fn value_<I>(must_escape: &'static [char], esc: &Escape) -> impl Parser<I, Output = Value>
176where
177    I: RangeStream<Token = char>,
178    I::Error: ParseError<I::Token, I::Range, I::Position>,
179    I::Range: Range,
180{
181    spaces().with(choice((
182        attempt(
183            between(
184                token('['),
185                token(']'),
186                sep_by(value(must_escape, esc), attempt(spaces().with(token(',')))),
187            )
188            .map(|mut vals: LPooled<Vec<Value>>| {
189                Value::Array(ValArray::from_iter_exact(vals.drain(..)))
190            }),
191        ),
192        attempt(between(
193            token('{'),
194            token('}'),
195            sep_by(
196                (
197                    value(must_escape, esc),
198                    spaces().with(string("=>")).with(value(must_escape, esc)),
199                ),
200                attempt(spaces().with(token(','))),
201            )
202            .map(|mut vals: LPooled<Vec<(Value, Value)>>| {
203                Value::Map(immutable_chunkmap::map::Map::from_iter(vals.drain(..)))
204            }),
205        )),
206        attempt(quoted(must_escape, esc)).map(|s| Value::String(ArcStr::from(s))),
207        attempt(flt::<_, f64>()).map(Value::F64),
208        attempt(int::<_, i64>()).map(Value::I64),
209        attempt(
210            string("true").skip(not_followed_by(alpha_num())).map(|_| Value::Bool(true)),
211        ),
212        attempt(
213            string("false")
214                .skip(not_followed_by(alpha_num()))
215                .map(|_| Value::Bool(false)),
216        ),
217        attempt(string("null").skip(not_followed_by(alpha_num())).map(|_| Value::Null)),
218        attempt(
219            constant("decimal")
220                .with(flt::<_, Decimal>())
221                .map(|d| Value::Decimal(Arc::new(d))),
222        ),
223        attempt(constant("u32").with(uint::<_, u32>()).map(Value::U32)),
224        attempt(constant("v32").with(uint::<_, u32>()).map(Value::V32)),
225        attempt(constant("i32").with(int::<_, i32>()).map(Value::I32)),
226        attempt(constant("z32").with(int::<_, i32>()).map(Value::Z32)),
227        attempt(constant("u64").with(uint::<_, u64>()).map(Value::U64)),
228        attempt(constant("v64").with(uint::<_, u64>()).map(Value::V64)),
229        attempt(constant("i64").with(int::<_, i64>()).map(Value::I64)),
230        attempt(constant("z64").with(int::<_, i64>()).map(Value::Z64)),
231        attempt(constant("f32").with(flt::<_, f32>()).map(Value::F32)),
232        attempt(constant("f64").with(flt::<_, f64>()).map(Value::F64)),
233        attempt(
234            constant("bytes")
235                .with(from_str(base64str()))
236                .map(|Base64Encoded(v)| Value::Bytes(PBytes::new(Bytes::from(v)))),
237        ),
238        attempt(
239            constant("error")
240                .with(value(must_escape, esc))
241                .map(|v| Value::Error(Arc::new(v))),
242        ),
243        attempt(
244            constant("datetime")
245                .with(from_str(quoted(must_escape, esc)))
246                .map(|d| Value::DateTime(Arc::new(d))),
247        ),
248        attempt(
249            constant("duration")
250                .with(flt::<_, f64>().and(choice((
251                    string("ns"),
252                    string("us"),
253                    string("ms"),
254                    string("s"),
255                ))))
256                .map(|(n, suffix)| {
257                    let d = match suffix {
258                        "ns" => Duration::from_secs_f64(n / 1e9),
259                        "us" => Duration::from_secs_f64(n / 1e6),
260                        "ms" => Duration::from_secs_f64(n / 1e3),
261                        "s" => Duration::from_secs_f64(n),
262                        _ => unreachable!(),
263                    };
264                    Value::Duration(Arc::new(d))
265                }),
266        ),
267    )))
268}
269
270parser! {
271    pub fn value['a, I](
272        must_escape: &'static [char],
273        esc: &'a Escape
274    )(I) -> Value
275    where [I: RangeStream<Token = char>, I::Range: Range]
276    {
277        value_(must_escape, esc)
278    }
279}
280
281pub fn parse_value(s: &str) -> anyhow::Result<Value> {
282    value(&VAL_MUST_ESC, &VAL_ESC)
283        .easy_parse(position::Stream::new(s))
284        .map(|(r, _)| r)
285        .map_err(|e| anyhow::anyhow!(format!("{}", e)))
286}
287
288#[cfg(test)]
289mod tests {
290    use arcstr::literal;
291
292    use super::*;
293
294    #[test]
295    fn parse() {
296        assert_eq!(Value::U32(23), parse_value("u32:23").unwrap());
297        assert_eq!(Value::V32(42), parse_value("v32:42").unwrap());
298        assert_eq!(Value::I32(-10), parse_value("i32:-10").unwrap());
299        assert_eq!(Value::I32(12321), parse_value("i32:12321").unwrap());
300        assert_eq!(Value::Z32(-99), parse_value("z32:-99").unwrap());
301        assert_eq!(Value::U64(100), parse_value("u64:100").unwrap());
302        assert_eq!(Value::V64(100), parse_value("v64:100").unwrap());
303        assert_eq!(Value::I64(-100), parse_value("i64:-100").unwrap());
304        assert_eq!(Value::I64(-100), parse_value("-100").unwrap());
305        assert_eq!(Value::I64(100), parse_value("i64:100").unwrap());
306        assert_eq!(Value::I64(100), parse_value("100").unwrap());
307        assert_eq!(Value::Z64(-100), parse_value("z64:-100").unwrap());
308        assert_eq!(Value::Z64(100), parse_value("z64:100").unwrap());
309        assert_eq!(Value::F32(3.1415), parse_value("f32:3.1415").unwrap());
310        assert_eq!(Value::F32(675.6), parse_value("f32:675.6").unwrap());
311        assert_eq!(Value::F32(42.3435), parse_value("f32:42.3435").unwrap());
312        assert_eq!(Value::F32(1.123e9), parse_value("f32:1.123e9").unwrap());
313        assert_eq!(Value::F32(1e9), parse_value("f32:1e9").unwrap());
314        assert_eq!(Value::F32(21.2443e-6), parse_value("f32:21.2443e-6").unwrap());
315        assert_eq!(Value::F32(3.), parse_value("f32:3.").unwrap());
316        assert_eq!(Value::F64(3.1415), parse_value("f64:3.1415").unwrap());
317        assert_eq!(Value::F64(3.1415), parse_value("3.1415").unwrap());
318        assert_eq!(Value::F64(1.123e9), parse_value("1.123e9").unwrap());
319        assert_eq!(Value::F64(1e9), parse_value("1e9").unwrap());
320        assert_eq!(Value::F64(21.2443e-6), parse_value("21.2443e-6").unwrap());
321        assert_eq!(Value::F64(3.), parse_value("f64:3.").unwrap());
322        assert_eq!(Value::F64(3.), parse_value("3.").unwrap());
323        let c = ArcStr::from(r#"I've got a lovely "bunch" of (coconuts)"#);
324        let s = r#""I've got a lovely \"bunch\" of (coconuts)""#;
325        assert_eq!(Value::String(c), parse_value(s).unwrap());
326        let c = ArcStr::new();
327        assert_eq!(Value::String(c), parse_value(r#""""#).unwrap());
328        let c = ArcStr::from(r#"""#);
329        let s = r#""\"""#;
330        assert_eq!(Value::String(c), parse_value(s).unwrap());
331        assert_eq!(Value::Bool(true), parse_value("true").unwrap());
332        assert_eq!(Value::Bool(true), parse_value("true ").unwrap());
333        assert_eq!(Value::Bool(false), parse_value("false").unwrap());
334        assert_eq!(Value::Null, parse_value("null").unwrap());
335        assert_eq!(
336            Value::error(literal!("error")),
337            parse_value(r#"error:"error""#).unwrap()
338        );
339    }
340}