1use crate::{pbuf::PBytes, Value};
2use arcstr::ArcStr;
3use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
4use bytes::Bytes;
5use combine::{
6 attempt, between, choice, from_str, many1, none_of, not_followed_by, one_of,
7 optional, parser,
8 parser::{
9 char::{alpha_num, digit, spaces, string},
10 combinator::recognize,
11 range::{take_while, take_while1},
12 repeat::escaped,
13 },
14 sep_by,
15 stream::{position, Range},
16 token, unexpected_any, EasyParser, ParseError, Parser, RangeStream,
17};
18use compact_str::CompactString;
19use escaping::Escape;
20use rust_decimal::Decimal;
21use std::{borrow::Cow, result::Result, str::FromStr, sync::LazyLock, time::Duration};
22
23fn should_escape_generic(c: char) -> bool {
24 c.is_control()
25}
26
27pub const VAL_MUST_ESC: [char; 2] = ['\\', '"'];
28pub static VAL_ESC: LazyLock<Escape> = LazyLock::new(|| {
29 Escape::new(
30 '\\',
31 &['\\', '"', '\n', '\r', '\0', '\t'],
32 &[('\n', "n"), ('\r', "r"), ('\t', "t"), ('\0', "0")],
33 Some(should_escape_generic),
34 )
35 .unwrap()
36});
37
38pub fn escaped_string<I>(
39 must_esc: &'static [char],
40 esc: &Escape,
41) -> impl Parser<I, Output = String>
42where
43 I: RangeStream<Token = char>,
44 I::Error: ParseError<I::Token, I::Range, I::Position>,
45 I::Range: Range,
46{
47 recognize(escaped(
48 take_while1(move |c| !must_esc.contains(&c)),
49 esc.get_escape_char(),
50 one_of(
51 esc.get_tr()
52 .iter()
53 .filter_map(|(_, s)| s.chars().next())
54 .chain(must_esc.iter().copied()),
55 ),
56 ))
57 .map(|s| match esc.unescape(&s) {
58 Cow::Borrowed(_) => s, Cow::Owned(s) => s,
60 })
61}
62
63fn quoted<I>(
64 must_escape: &'static [char],
65 esc: &Escape,
66) -> impl Parser<I, Output = String>
67where
68 I: RangeStream<Token = char>,
69 I::Error: ParseError<I::Token, I::Range, I::Position>,
70 I::Range: Range,
71{
72 spaces().with(between(token('"'), token('"'), escaped_string(must_escape, esc)))
73}
74
75fn uint<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
76where
77 I: RangeStream<Token = char>,
78 I::Error: ParseError<I::Token, I::Range, I::Position>,
79 I::Range: Range,
80{
81 many1(digit()).then(|s: CompactString| match s.parse::<T>() {
82 Ok(i) => combine::value(i).right(),
83 Err(_) => unexpected_any("invalid unsigned integer").left(),
84 })
85}
86
87pub fn int<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
88where
89 I: RangeStream<Token = char>,
90 I::Error: ParseError<I::Token, I::Range, I::Position>,
91 I::Range: Range,
92{
93 recognize((optional(token('-')), take_while1(|c: char| c.is_digit(10)))).then(
94 |s: CompactString| match s.parse::<T>() {
95 Ok(i) => combine::value(i).right(),
96 Err(_) => unexpected_any("invalid signed integer").left(),
97 },
98 )
99}
100
101fn flt<I, T: FromStr + Clone + Copy>() -> impl Parser<I, Output = T>
102where
103 I: RangeStream<Token = char>,
104 I::Error: ParseError<I::Token, I::Range, I::Position>,
105 I::Range: Range,
106{
107 choice((
108 attempt(recognize((
109 optional(token('-')),
110 take_while1(|c: char| c.is_digit(10)),
111 optional(token('.')),
112 take_while(|c: char| c.is_digit(10)),
113 token('e'),
114 optional(token('-')),
115 take_while1(|c: char| c.is_digit(10)),
116 ))),
117 attempt(recognize((
118 optional(token('-')),
119 take_while1(|c: char| c.is_digit(10)),
120 token('.'),
121 take_while(|c: char| c.is_digit(10)),
122 ))),
123 ))
124 .then(|s: CompactString| match s.parse::<T>() {
125 Ok(i) => combine::value(i).right(),
126 Err(_) => unexpected_any("invalid float").left(),
127 })
128}
129
130fn dcml<I>() -> impl Parser<I, Output = Decimal>
131where
132 I: RangeStream<Token = char>,
133 I::Error: ParseError<I::Token, I::Range, I::Position>,
134 I::Range: Range,
135{
136 recognize((
137 optional(token('-')),
138 take_while1(|c: char| c.is_digit(10)),
139 optional(token('.')),
140 take_while(|c: char| c.is_digit(10)),
141 ))
142 .then(|s: CompactString| match s.parse::<Decimal>() {
143 Ok(i) => combine::value(i).right(),
144 Err(_) => unexpected_any("invalid decimal").left(),
145 })
146}
147
148struct Base64Encoded(Vec<u8>);
149
150impl FromStr for Base64Encoded {
151 type Err = base64::DecodeError;
152
153 fn from_str(s: &str) -> Result<Self, Self::Err> {
154 BASE64.decode(s).map(Base64Encoded)
155 }
156}
157
158fn base64str<I>() -> impl Parser<I, Output = String>
159where
160 I: RangeStream<Token = char>,
161 I::Error: ParseError<I::Token, I::Range, I::Position>,
162 I::Range: Range,
163{
164 recognize((
165 take_while(|c: char| c.is_ascii_alphanumeric() || c == '+' || c == '/'),
166 take_while(|c: char| c == '='),
167 ))
168}
169
170fn constant<I>(typ: &'static str) -> impl Parser<I, Output = char>
171where
172 I: RangeStream<Token = char>,
173 I::Error: ParseError<I::Token, I::Range, I::Position>,
174 I::Range: Range,
175{
176 string(typ).with(token(':'))
177}
178
179pub fn close_expr<I>() -> impl Parser<I, Output = ()>
180where
181 I: RangeStream<Token = char>,
182 I::Error: ParseError<I::Token, I::Range, I::Position>,
183 I::Range: Range,
184{
185 not_followed_by(none_of([' ', '\n', '\t', ';', ')', ',', ']', '}', '"']))
186}
187
188fn value_<I>(must_escape: &'static [char], esc: &Escape) -> impl Parser<I, Output = Value>
189where
190 I: RangeStream<Token = char>,
191 I::Error: ParseError<I::Token, I::Range, I::Position>,
192 I::Range: Range,
193{
194 spaces().with(choice((
195 attempt(
196 between(token('['), token(']'), sep_by(value(must_escape, esc), token(',')))
197 .map(|vals: Vec<Value>| Value::Array(vals.into())),
198 ),
199 attempt(quoted(must_escape, esc)).map(|s| Value::String(ArcStr::from(s))),
200 attempt(flt::<_, f64>()).map(Value::F64),
201 attempt(int::<_, i64>()).map(Value::I64),
202 attempt(
203 string("true").skip(not_followed_by(alpha_num())).map(|_| Value::Bool(true)),
204 ),
205 attempt(
206 string("false")
207 .skip(not_followed_by(alpha_num()))
208 .map(|_| Value::Bool(false)),
209 ),
210 attempt(string("null").skip(not_followed_by(alpha_num())).map(|_| Value::Null)),
211 attempt(constant("decimal").with(dcml()).map(Value::Decimal)),
212 attempt(constant("u32").with(uint::<_, u32>()).map(Value::U32)),
213 attempt(constant("v32").with(uint::<_, u32>()).map(Value::V32)),
214 attempt(constant("i32").with(int::<_, i32>()).map(Value::I32)),
215 attempt(constant("z32").with(int::<_, i32>()).map(Value::Z32)),
216 attempt(constant("u64").with(uint::<_, u64>()).map(Value::U64)),
217 attempt(constant("v64").with(uint::<_, u64>()).map(Value::V64)),
218 attempt(constant("i64").with(int::<_, i64>()).map(Value::I64)),
219 attempt(constant("z64").with(int::<_, i64>()).map(Value::Z64)),
220 attempt(constant("f32").with(flt::<_, f32>()).map(Value::F32)),
221 attempt(constant("f64").with(flt::<_, f64>()).map(Value::F64)),
222 attempt(
223 constant("bytes")
224 .with(from_str(base64str()))
225 .map(|Base64Encoded(v)| Value::Bytes(PBytes::new(Bytes::from(v)))),
226 ),
227 attempt(
228 constant("error")
229 .with(quoted(must_escape, esc))
230 .map(|s| Value::Error(ArcStr::from(s))),
231 ),
232 attempt(
233 constant("datetime")
234 .with(from_str(quoted(must_escape, esc)))
235 .map(|d| Value::DateTime(d)),
236 ),
237 attempt(
238 constant("duration")
239 .with(flt::<_, f64>().and(choice((
240 string("ns"),
241 string("us"),
242 string("ms"),
243 string("s"),
244 ))))
245 .map(|(n, suffix)| {
246 let d = match suffix {
247 "ns" => Duration::from_secs_f64(n / 1e9),
248 "us" => Duration::from_secs_f64(n / 1e6),
249 "ms" => Duration::from_secs_f64(n / 1e3),
250 "s" => Duration::from_secs_f64(n),
251 _ => unreachable!(),
252 };
253 Value::Duration(d)
254 }),
255 ),
256 )))
257}
258
259parser! {
260 pub fn value['a, I](
261 must_escape: &'static [char],
262 esc: &'a Escape
263 )(I) -> Value
264 where [I: RangeStream<Token = char>, I::Range: Range]
265 {
266 value_(must_escape, esc)
267 }
268}
269
270pub fn parse_value(s: &str) -> anyhow::Result<Value> {
271 value(&VAL_MUST_ESC, &VAL_ESC)
272 .easy_parse(position::Stream::new(s))
273 .map(|(r, _)| r)
274 .map_err(|e| anyhow::anyhow!(format!("{}", e)))
275}
276
277#[cfg(test)]
278mod tests {
279 use super::*;
280
281 #[test]
282 fn parse() {
283 assert_eq!(Value::U32(23), parse_value("u32:23").unwrap());
284 assert_eq!(Value::V32(42), parse_value("v32:42").unwrap());
285 assert_eq!(Value::I32(-10), parse_value("i32:-10").unwrap());
286 assert_eq!(Value::I32(12321), parse_value("i32:12321").unwrap());
287 assert_eq!(Value::Z32(-99), parse_value("z32:-99").unwrap());
288 assert_eq!(Value::U64(100), parse_value("u64:100").unwrap());
289 assert_eq!(Value::V64(100), parse_value("v64:100").unwrap());
290 assert_eq!(Value::I64(-100), parse_value("i64:-100").unwrap());
291 assert_eq!(Value::I64(-100), parse_value("-100").unwrap());
292 assert_eq!(Value::I64(100), parse_value("i64:100").unwrap());
293 assert_eq!(Value::I64(100), parse_value("100").unwrap());
294 assert_eq!(Value::Z64(-100), parse_value("z64:-100").unwrap());
295 assert_eq!(Value::Z64(100), parse_value("z64:100").unwrap());
296 assert_eq!(Value::F32(3.1415), parse_value("f32:3.1415").unwrap());
297 assert_eq!(Value::F32(675.6), parse_value("f32:675.6").unwrap());
298 assert_eq!(Value::F32(42.3435), parse_value("f32:42.3435").unwrap());
299 assert_eq!(Value::F32(1.123e9), parse_value("f32:1.123e9").unwrap());
300 assert_eq!(Value::F32(1e9), parse_value("f32:1e9").unwrap());
301 assert_eq!(Value::F32(21.2443e-6), parse_value("f32:21.2443e-6").unwrap());
302 assert_eq!(Value::F32(3.), parse_value("f32:3.").unwrap());
303 assert_eq!(Value::F64(3.1415), parse_value("f64:3.1415").unwrap());
304 assert_eq!(Value::F64(3.1415), parse_value("3.1415").unwrap());
305 assert_eq!(Value::F64(1.123e9), parse_value("1.123e9").unwrap());
306 assert_eq!(Value::F64(1e9), parse_value("1e9").unwrap());
307 assert_eq!(Value::F64(21.2443e-6), parse_value("21.2443e-6").unwrap());
308 assert_eq!(Value::F64(3.), parse_value("f64:3.").unwrap());
309 assert_eq!(Value::F64(3.), parse_value("3.").unwrap());
310 let c = ArcStr::from(r#"I've got a lovely "bunch" of (coconuts)"#);
311 let s = r#""I've got a lovely \"bunch\" of (coconuts)""#;
312 assert_eq!(Value::String(c), parse_value(s).unwrap());
313 let c = ArcStr::new();
314 assert_eq!(Value::String(c), parse_value(r#""""#).unwrap());
315 let c = ArcStr::from(r#"""#);
316 let s = r#""\"""#;
317 assert_eq!(Value::String(c), parse_value(s).unwrap());
318 assert_eq!(Value::Bool(true), parse_value("true").unwrap());
319 assert_eq!(Value::Bool(true), parse_value("true ").unwrap());
320 assert_eq!(Value::Bool(false), parse_value("false").unwrap());
321 assert_eq!(Value::Null, parse_value("null").unwrap());
322 assert_eq!(
323 Value::Error(ArcStr::from("error")),
324 parse_value(r#"error:"error""#).unwrap()
325 );
326 }
327}