1use crate::{pbuf::PBytes, Abstract, ValArray, Value};
2use arcstr::ArcStr;
3use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
4use bytes::Bytes;
5use combine::{
6 attempt, between, choice, eof, from_str, look_ahead, many1, none_of, not_followed_by,
7 one_of, optional, parser, satisfy,
8 parser::{
9 char::{alpha_num, digit, spaces, string},
10 combinator::recognize,
11 range::{take_while, take_while1},
12 repeat::escaped,
13 },
14 sep_by, sep_by1,
15 stream::{position, Range},
16 token, unexpected_any, EasyParser, ParseError, Parser, RangeStream,
17};
18use compact_str::CompactString;
19use escaping::Escape;
20use netidx_core::pack::Pack;
21use poolshark::local::LPooled;
22use rust_decimal::Decimal;
23use std::{borrow::Cow, str::FromStr, sync::LazyLock, time::Duration};
24use triomphe::Arc;
25
26pub fn sep_by1_tok<I, O, OC, EP, SP, TP>(
28 p: EP,
29 sep: SP,
30 term: TP,
31) -> impl Parser<I, Output = OC>
32where
33 I: RangeStream<Token = char>,
34 I::Error: ParseError<I::Token, I::Range, I::Position>,
35 I::Range: Range,
36 OC: Extend<O> + Default,
37 SP: Parser<I>,
38 EP: Parser<I, Output = O>,
39 TP: Parser<I>,
40{
41 sep_by1(choice((look_ahead(term).map(|_| None::<O>), p.map(Some))), sep).map(
42 |mut e: LPooled<Vec<Option<O>>>| {
43 let mut res = OC::default();
44 res.extend(e.drain(..).filter_map(|e| e));
45 res
46 },
47 )
48}
49
50pub fn sep_by_tok<I, O, OC, EP, SP, TP>(
52 p: EP,
53 sep: SP,
54 term: TP,
55) -> impl Parser<I, Output = OC>
56where
57 I: RangeStream<Token = char>,
58 I::Error: ParseError<I::Token, I::Range, I::Position>,
59 I::Range: Range,
60 OC: Extend<O> + Default,
61 SP: Parser<I>,
62 EP: Parser<I, Output = O>,
63 TP: Parser<I>,
64{
65 sep_by(choice((look_ahead(term).map(|_| None::<O>), p.map(Some))), sep).map(
66 |mut e: LPooled<Vec<Option<O>>>| {
67 let mut res = OC::default();
68 res.extend(e.drain(..).filter_map(|e| e));
69 res
70 },
71 )
72}
73
74pub fn not_prefix<I>() -> impl Parser<I, Output = ()>
75where
76 I: RangeStream<Token = char>,
77 I::Error: ParseError<I::Token, I::Range, I::Position>,
78 I::Range: Range,
79{
80 not_followed_by(choice((token('_'), alpha_num())))
81}
82
83fn sptoken<I>(t: char) -> impl Parser<I, Output = char>
84where
85 I: RangeStream<Token = char>,
86 I::Error: ParseError<I::Token, I::Range, I::Position>,
87 I::Range: Range,
88{
89 spaces().with(token(t))
90}
91
92fn spstring<I>(t: &'static str) -> impl Parser<I, Output = &'static str>
93where
94 I: RangeStream<Token = char>,
95 I::Error: ParseError<I::Token, I::Range, I::Position>,
96 I::Range: Range,
97{
98 spaces().with(string(t))
99}
100
101fn csep<I>() -> impl Parser<I, Output = char>
102where
103 I: RangeStream<Token = char>,
104 I::Error: ParseError<I::Token, I::Range, I::Position>,
105 I::Range: Range,
106{
107 attempt(spaces().with(token(','))).skip(spaces())
108}
109
110fn should_escape_generic(c: char) -> bool {
111 c.is_control()
112}
113
114pub const VAL_MUST_ESC: [char; 2] = ['\\', '"'];
115pub static VAL_ESC: LazyLock<Escape> = LazyLock::new(|| {
116 Escape::new(
117 '\\',
118 &['\\', '"', '\n', '\r', '\0', '\t'],
119 &[('\n', "n"), ('\r', "r"), ('\t', "t"), ('\0', "0")],
120 Some(should_escape_generic),
121 )
122 .unwrap()
123});
124
125pub fn escaped_string<I>(
126 must_esc: &'static [char],
127 esc: &Escape,
128) -> impl Parser<I, Output = String>
129where
130 I: RangeStream<Token = char>,
131 I::Error: ParseError<I::Token, I::Range, I::Position>,
132 I::Range: Range,
133{
134 recognize(escaped(
135 take_while1(move |c| !must_esc.contains(&c)),
136 esc.get_escape_char(),
137 one_of(
138 esc.get_tr()
139 .iter()
140 .filter_map(|(_, s)| s.chars().next())
141 .chain(must_esc.iter().copied()),
142 ),
143 ))
144 .map(|s| match esc.unescape(&s) {
145 Cow::Borrowed(_) => s, Cow::Owned(s) => s,
147 })
148}
149
150fn quoted<I>(
151 must_escape: &'static [char],
152 esc: &Escape,
153) -> impl Parser<I, Output = String>
154where
155 I: RangeStream<Token = char>,
156 I::Error: ParseError<I::Token, I::Range, I::Position>,
157 I::Range: Range,
158{
159 between(token('"'), token('"'), escaped_string(must_escape, esc))
160}
161
162pub trait FromStrRadix: Sized {
163 fn from_str_radix(s: &str, radix: u32) -> Result<Self, std::num::ParseIntError>;
164}
165
166macro_rules! impl_from_str_radix {
167 ($($t:ty),*) => { $(
168 impl FromStrRadix for $t {
169 fn from_str_radix(s: &str, radix: u32) -> Result<Self, std::num::ParseIntError> {
170 <$t>::from_str_radix(s, radix)
171 }
172 }
173 )* };
174}
175
176impl_from_str_radix!(u8, i8, u16, i16, u32, i32, u64, i64, usize, isize);
177
178fn radix_prefix<I>() -> impl Parser<I, Output = (u32, CompactString)>
179where
180 I: RangeStream<Token = char>,
181 I::Error: ParseError<I::Token, I::Range, I::Position>,
182 I::Range: Range,
183{
184 choice((
185 attempt(
186 token('0')
187 .with(one_of(['x', 'X']))
188 .with(many1(satisfy(|c: char| c.is_ascii_hexdigit())))
189 .map(|s: CompactString| (16u32, s)),
190 ),
191 attempt(
192 token('0')
193 .with(one_of(['b', 'B']))
194 .with(many1(satisfy(|c: char| c == '0' || c == '1')))
195 .map(|s: CompactString| (2u32, s)),
196 ),
197 attempt(
198 token('0')
199 .with(one_of(['o', 'O']))
200 .with(many1(satisfy(|c: char| c.is_digit(8))))
201 .map(|s: CompactString| (8u32, s)),
202 ),
203 ))
204}
205
206fn uint<I, T: FromStrRadix + Clone + Copy>() -> impl Parser<I, Output = T>
207where
208 I: RangeStream<Token = char>,
209 I::Error: ParseError<I::Token, I::Range, I::Position>,
210 I::Range: Range,
211{
212 choice((
213 radix_prefix(),
214 many1(digit()).map(|s: CompactString| (10u32, s)),
215 ))
216 .then(|(radix, digits): (u32, CompactString)| {
217 match T::from_str_radix(&digits, radix) {
218 Ok(i) => combine::value(i).right(),
219 Err(_) => unexpected_any("invalid unsigned integer").left(),
220 }
221 })
222}
223
224pub fn int<I, T: FromStrRadix + Clone + Copy>() -> impl Parser<I, Output = T>
225where
226 I: RangeStream<Token = char>,
227 I::Error: ParseError<I::Token, I::Range, I::Position>,
228 I::Range: Range,
229{
230 choice((
231 attempt(optional(token('-')).and(radix_prefix())).then(
232 |(sign, (radix, digits)): (Option<char>, (u32, CompactString))| {
233 let s = if sign.is_some() {
234 let mut s = CompactString::new("-");
235 s.push_str(&digits);
236 s
237 } else {
238 digits
239 };
240 match T::from_str_radix(&s, radix) {
241 Ok(i) => combine::value(i).right(),
242 Err(_) => unexpected_any("invalid signed integer").left(),
243 }
244 },
245 ),
246 recognize((optional(token('-')), take_while1(|c: char| c.is_digit(10)))).then(
247 |s: CompactString| match T::from_str_radix(&s, 10) {
248 Ok(i) => combine::value(i).right(),
249 Err(_) => unexpected_any("invalid signed integer").left(),
250 },
251 ),
252 ))
253}
254
255fn flt<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
256where
257 I: RangeStream<Token = char>,
258 I::Error: ParseError<I::Token, I::Range, I::Position>,
259 I::Range: Range,
260{
261 choice((
262 attempt(recognize((
263 optional(token('-')),
264 take_while1(|c: char| c.is_digit(10)),
265 optional(token('.')),
266 take_while(|c: char| c.is_digit(10)),
267 token('e'),
268 optional(token('-')),
269 take_while1(|c: char| c.is_digit(10)),
270 ))),
271 attempt(recognize((
272 optional(token('-')),
273 take_while1(|c: char| c.is_digit(10)),
274 token('.'),
275 take_while(|c: char| c.is_digit(10)),
276 ))),
277 ))
278 .then(|s: CompactString| match s.parse::<T>() {
279 Ok(i) => combine::value(i).right(),
280 Err(_) => unexpected_any("invalid float").left(),
281 })
282}
283
284fn base64<I>() -> impl Parser<I, Output = LPooled<Vec<u8>>>
285where
286 I: RangeStream<Token = char>,
287 I::Error: ParseError<I::Token, I::Range, I::Position>,
288 I::Range: Range,
289{
290 recognize((
291 take_while(|c: char| c.is_ascii_alphanumeric() || c == '+' || c == '/'),
292 take_while(|c: char| c == '='),
293 ))
294 .then(|s: LPooled<String>| {
295 let s = if &*s == "==" { LPooled::take() } else { s };
296 let mut buf: LPooled<Vec<u8>> = LPooled::take();
297 match BASE64.decode_vec(&*s, &mut buf) {
298 Ok(()) => combine::value(buf).right(),
299 Err(_) => unexpected_any("base64 decode failed").left(),
300 }
301 })
302}
303
304fn constant<I>(typ: &'static str) -> impl Parser<I, Output = ()>
305where
306 I: RangeStream<Token = char>,
307 I::Error: ParseError<I::Token, I::Range, I::Position>,
308 I::Range: Range,
309{
310 string(typ).with(spaces()).with(token(':')).with(spaces()).map(|_| ())
311}
312
313pub fn close_expr<I>() -> impl Parser<I, Output = ()>
314where
315 I: RangeStream<Token = char>,
316 I::Error: ParseError<I::Token, I::Range, I::Position>,
317 I::Range: Range,
318{
319 not_followed_by(none_of([' ', '\n', '\t', ';', ')', ',', ']', '}', '"']))
320}
321
322fn value_<I>(must_escape: &'static [char], esc: &Escape) -> impl Parser<I, Output = Value>
323where
324 I: RangeStream<Token = char>,
325 I::Error: ParseError<I::Token, I::Range, I::Position>,
326 I::Range: Range,
327{
328 spaces().with(choice((
329 choice((
330 attempt(constant("u8")).with(uint::<_, u8>().map(Value::U8)),
331 attempt(constant("u16")).with(uint::<_, u16>().map(Value::U16)),
332 attempt(constant("u32")).with(uint::<_, u32>().map(Value::U32)),
333 constant("u64").with(uint::<_, u64>().map(Value::U64)),
334 attempt(constant("i8")).with(int::<_, i8>().map(Value::I8)),
335 attempt(constant("i16")).with(int::<_, i16>().map(Value::I16)),
336 attempt(constant("i32")).with(int::<_, i32>().map(Value::I32)),
337 constant("i64").with(int::<_, i64>().map(Value::I64)),
338 attempt(constant("v32")).with(uint::<_, u32>().map(Value::V32)),
339 constant("v64").with(uint::<_, u64>().map(Value::V64)),
340 attempt(constant("z32")).with(int::<_, i32>().map(Value::Z32)),
341 constant("z64").with(int::<_, i64>().map(Value::Z64)),
342 attempt(constant("f32")).with(flt::<_, f32>().map(Value::F32)),
343 attempt(constant("f64")).with(flt::<_, f64>().map(Value::F64)),
344 )),
345 between(
346 token('['),
347 sptoken(']'),
348 sep_by_tok(value(must_escape, esc), csep(), token(']')),
349 )
350 .map(|mut vals: LPooled<Vec<Value>>| {
351 Value::Array(ValArray::from_iter_exact(vals.drain(..)))
352 }),
353 between(
354 token('{'),
355 sptoken('}'),
356 sep_by_tok(
357 (value(must_escape, esc), spstring("=>").with(value(must_escape, esc))),
358 csep(),
359 token('}'),
360 )
361 .map(|mut vals: LPooled<Vec<(Value, Value)>>| {
362 Value::Map(immutable_chunkmap::map::Map::from_iter(vals.drain(..)))
363 }),
364 ),
365 quoted(must_escape, esc).map(|s| Value::String(ArcStr::from(s))),
366 flt::<_, f64>().map(Value::F64),
367 int::<_, i64>().map(Value::I64),
368 attempt(string("true").skip(not_prefix())).map(|_| Value::Bool(true)),
369 attempt(string("false").skip(not_prefix())).map(|_| Value::Bool(false)),
370 attempt(string("null").skip(not_prefix())).map(|_| Value::Null),
371 constant("bytes")
372 .with(base64())
373 .map(|v| Value::Bytes(PBytes::new(Bytes::from(LPooled::detach(v))))),
374 constant("abstract").with(base64()).then(|v| {
375 match Abstract::decode(&mut &v[..]) {
376 Ok(a) => combine::value(Value::Abstract(a)).right(),
377 Err(_) => unexpected_any("failed to unpack abstract").left(),
378 }
379 }),
380 constant("error")
381 .with(value(must_escape, esc))
382 .map(|v| Value::Error(Arc::new(v))),
383 attempt(constant("decimal"))
384 .with(flt::<_, Decimal>())
385 .map(|d| Value::Decimal(Arc::new(d))),
386 attempt(constant("datetime"))
387 .with(from_str(quoted(must_escape, esc)))
388 .map(|d| Value::DateTime(Arc::new(d))),
389 constant("duration")
390 .with(flt::<_, f64>().and(choice((
391 string("ns"),
392 string("us"),
393 string("ms"),
394 string("s"),
395 ))))
396 .map(|(n, suffix)| {
397 let d = match suffix {
398 "ns" => Duration::from_secs_f64(n / 1e9),
399 "us" => Duration::from_secs_f64(n / 1e6),
400 "ms" => Duration::from_secs_f64(n / 1e3),
401 "s" => Duration::from_secs_f64(n),
402 _ => unreachable!(),
403 };
404 Value::Duration(Arc::new(d))
405 }),
406 )))
407}
408
409parser! {
410 pub fn value['a, I](
411 must_escape: &'static [char],
412 esc: &'a Escape
413 )(I) -> Value
414 where [I: RangeStream<Token = char>, I::Range: Range]
415 {
416 value_(must_escape, esc)
417 }
418}
419
420pub fn parse_value(s: &str) -> anyhow::Result<Value> {
421 value(&VAL_MUST_ESC, &VAL_ESC)
422 .skip(spaces())
423 .skip(eof())
424 .easy_parse(position::Stream::new(s))
425 .map(|(r, _)| r)
426 .map_err(|e| anyhow::anyhow!(format!("{}", e)))
427}
428
429#[cfg(test)]
430mod tests {
431 use arcstr::literal;
432
433 use crate::Map;
434
435 use super::*;
436
437 #[test]
438 fn parse() {
439 assert_eq!(Value::U32(23), parse_value("u32:23").unwrap());
440 assert_eq!(Value::V32(42), parse_value("v32:42").unwrap());
441 assert_eq!(Value::I32(-10), parse_value("i32:-10").unwrap());
442 assert_eq!(Value::I32(12321), parse_value("i32:12321").unwrap());
443 assert_eq!(Value::Z32(-99), parse_value("z32:-99").unwrap());
444 assert_eq!(Value::U64(100), parse_value("u64:100").unwrap());
445 assert_eq!(Value::V64(100), parse_value("v64:100").unwrap());
446 assert_eq!(Value::I64(-100), parse_value("i64:-100").unwrap());
447 assert_eq!(Value::I64(-100), parse_value("-100").unwrap());
448 assert_eq!(Value::I64(100), parse_value("i64:100").unwrap());
449 assert_eq!(Value::I64(100), parse_value("100").unwrap());
450 assert_eq!(Value::Z64(-100), parse_value("z64:-100").unwrap());
451 assert_eq!(Value::Z64(100), parse_value("z64:100").unwrap());
452 assert_eq!(Value::F32(3.1415), parse_value("f32:3.1415").unwrap());
453 assert_eq!(Value::F32(675.6), parse_value("f32:675.6").unwrap());
454 assert_eq!(Value::F32(42.3435), parse_value("f32:42.3435").unwrap());
455 assert_eq!(Value::F32(1.123e9), parse_value("f32:1.123e9").unwrap());
456 assert_eq!(Value::F32(1e9), parse_value("f32:1e9").unwrap());
457 assert_eq!(Value::F32(21.2443e-6), parse_value("f32:21.2443e-6").unwrap());
458 assert_eq!(Value::F32(3.), parse_value("f32:3.").unwrap());
459 assert_eq!(Value::F64(3.1415), parse_value("f64:3.1415").unwrap());
460 assert_eq!(Value::F64(3.1415), parse_value("3.1415").unwrap());
461 assert_eq!(Value::F64(1.123e9), parse_value("1.123e9").unwrap());
462 assert_eq!(Value::F64(1e9), parse_value("1e9").unwrap());
463 assert_eq!(Value::F64(21.2443e-6), parse_value("21.2443e-6").unwrap());
464 assert_eq!(Value::F64(3.), parse_value("f64:3.").unwrap());
465 assert_eq!(Value::F64(3.), parse_value("3.").unwrap());
466 let c = ArcStr::from(r#"I've got a lovely "bunch" of (coconuts)"#);
467 let s = r#""I've got a lovely \"bunch\" of (coconuts)""#;
468 assert_eq!(Value::String(c), parse_value(s).unwrap());
469 let c = ArcStr::new();
470 assert_eq!(Value::String(c), parse_value(r#""""#).unwrap());
471 let c = ArcStr::from(r#"""#);
472 let s = r#""\"""#;
473 assert_eq!(Value::String(c), parse_value(s).unwrap());
474 assert_eq!(Value::Bool(true), parse_value("true").unwrap());
475 assert_eq!(Value::Bool(true), parse_value("true ").unwrap());
476 assert_eq!(Value::Bool(false), parse_value("false").unwrap());
477 assert_eq!(Value::Null, parse_value("null").unwrap());
478 assert_eq!(
479 Value::error(literal!("error")),
480 parse_value(r#"error:"error""#).unwrap()
481 );
482 let a = ValArray::from_iter_exact(
483 [Value::I64(42), Value::String(literal!("hello world"))].into_iter(),
484 );
485 assert_eq!(
486 Value::Array(a.clone()),
487 parse_value(r#"[42, "hello world", ]"#).unwrap()
488 );
489 assert_eq!(Value::Array(a), parse_value(r#"[42, "hello world"]"#).unwrap());
490 let m = Map::from_iter([
491 (Value::I64(42), Value::String(literal!("hello world"))),
492 (Value::String(literal!("hello world")), Value::I64(42)),
493 ]);
494 assert_eq!(
495 Value::Map(m.clone()),
496 parse_value(r#"{ 42 => "hello world", "hello world" => 42, }"#).unwrap()
497 );
498 assert_eq!(
499 Value::Map(m.clone()),
500 parse_value(r#"{ 42 => "hello world", "hello world" => 42}"#).unwrap()
501 );
502 assert_eq!(Value::U8(255), parse_value("u8:0xFF").unwrap());
504 assert_eq!(Value::U8(255), parse_value("u8:0XFF").unwrap());
505 assert_eq!(Value::I32(-31), parse_value("i32:-0x1F").unwrap());
506 assert_eq!(Value::U64(0xDEAD), parse_value("u64:0xDEAD").unwrap());
507 assert_eq!(Value::I64(255), parse_value("i64:0xFF").unwrap());
508 assert_eq!(Value::U16(10), parse_value("u16:0b1010").unwrap());
510 assert_eq!(Value::U16(10), parse_value("u16:0B1010").unwrap());
511 assert_eq!(Value::I8(-1), parse_value("i8:-0b1").unwrap());
512 assert_eq!(Value::U32(63), parse_value("u32:0o77").unwrap());
514 assert_eq!(Value::U32(63), parse_value("u32:0O77").unwrap());
515 assert_eq!(Value::I64(-8), parse_value("i64:-0o10").unwrap());
516 assert_eq!(Value::I64(255), parse_value("0xFF").unwrap());
518 assert_eq!(Value::I64(10), parse_value("0b1010").unwrap());
519 assert_eq!(Value::I64(63), parse_value("0o77").unwrap());
520 assert_eq!(Value::I64(-255), parse_value("-0xFF").unwrap());
521 }
522}