1use alloc::borrow::Cow;
2use alloc::collections::BTreeMap;
3use alloc::string::String;
4use alloc::vec::Vec;
5use core::str::FromStr;
6
7use esexpr::ESExpr;
8use esexpr::cowstr::CowStr;
9use half::f16;
10use nom::branch::alt;
11use nom::bytes::complete::{escaped_transform, tag, tag_no_case, take_until, take_while, take_while_m_n, take_while1};
12use nom::character::complete::{alphanumeric1, char, digit1, hex_digit1, multispace1, none_of, one_of};
13use nom::combinator::{cut, eof, map, map_res, not, opt, peek, recognize, value};
14use nom::multi::{many0, many0_count};
15use nom::sequence::{delimited, pair, preceded, separated_pair, terminated};
16use nom::{IResult, Parser};
17use num_bigint::{BigInt, BigUint, Sign};
18
19#[derive(Debug, Clone, PartialEq)]
21pub enum LexErrorType {
22 UnexpectedToken,
24
25 UnterminatedString,
27
28 UnterminatedIdentifierString,
30
31 InvalidUnicodeCodePoint(u32),
33
34 InvalidNaNPayload(u64),
36}
37
38pub fn skip_ws(input: &str) -> IResult<&str, ()> {
43 value((), many0_count(alt((value((), multispace1), comment)))).parse(input)
44}
45
46fn comment(input: &str) -> IResult<&str, ()> {
47 value((), pair(tag("//"), take_until("\n"))).parse(input)
48}
49
50fn is_alpha(c: char) -> bool {
51 c.is_ascii_lowercase()
52}
53
54fn is_alphanum(c: char) -> bool {
55 c.is_ascii_lowercase() || c.is_ascii_digit()
56}
57
58pub fn simple_identifier(input: &str) -> IResult<&str, &str> {
63 preceded(
64 skip_ws,
65 recognize((
66 take_while1(is_alpha),
67 take_while(is_alphanum),
68 many0(pair(char('-'), take_while1(is_alphanum))),
69 )),
70 )
71 .parse(input)
72}
73
74fn identifier(input: &str) -> IResult<&str, String> {
75 alt((
76 map(simple_identifier, String::from),
77 preceded(skip_ws, string_impl('\'', "'\\")),
78 ))
79 .parse(input)
80}
81
82fn float_decimal(input: &str) -> IResult<&str, ESExpr<'static>> {
83 map(
84 recognize((
85 opt(one_of("+-")),
86 digit1,
87 char('.'),
88 cut(digit1),
89 opt((one_of("eE"), opt(one_of("+-")), digit1)),
90 opt(alt((tag("f16"), tag("F16"), tag("f"), tag("F"), tag("d"), tag("D")))),
91 not(peek(alphanumeric1)),
92 )),
93 parse_dec_float,
94 )
95 .parse(input)
96}
97
98fn parse_dec_float(s: &str) -> ESExpr<'static> {
99 if s.ends_with("f16") || s.ends_with("F16") {
100 #[expect(
101 clippy::unwrap_used,
102 reason = "Shouldn't fail because the parser should ensure the format is valid."
103 )]
104 let f = s.trim_end_matches("f16")
105 .trim_end_matches("F16")
106 .parse::<f16>()
107 .unwrap();
108 ESExpr::Float16(f)
109 }
110 else if s.ends_with('f') || s.ends_with('F') {
111 #[expect(
112 clippy::unwrap_used,
113 reason = "Shouldn't fail because the parser should ensure the format is valid."
114 )]
115 let f = s.trim_end_matches('f').trim_end_matches('F').parse::<f32>().unwrap();
116 ESExpr::Float32(f)
117 }
118 else {
119 #[expect(
120 clippy::unwrap_used,
121 reason = "Shouldn't fail because the parser should ensure the format is valid."
122 )]
123 let d = s.trim_end_matches('f').trim_end_matches('F').parse::<f64>().unwrap();
124 ESExpr::Float64(d)
125 }
126}
127
128fn float_hex(input: &str) -> IResult<&str, ESExpr<'static>> {
129 map(
130 recognize((
131 opt(one_of("+-")),
132 tag_no_case("0x"),
133 hex_digit1,
134 char('.'),
135 hex_digit1,
136 cut(one_of("pP")),
137 opt(one_of("+-")),
138 digit1,
139 opt(alt((tag("f16"), tag("F16"), tag("f"), tag("F"), tag("d"), tag("D")))),
140 not(peek(alphanumeric1)),
141 )),
142 parse_hex_float,
143 )
144 .parse(input)
145}
146
147fn parse_hex_float(s: &str) -> ESExpr<'static> {
148 if s.ends_with("f16") || s.ends_with("F16") {
149 #[expect(
150 clippy::unwrap_used,
151 reason = "Shouldn't fail because the parser should ensure the format is valid."
152 )]
153 let repr: hexponent::FloatLiteral = s
154 .trim_end_matches("f16")
155 .trim_end_matches("f16")
156 .parse::<hexponent::FloatLiteral>()
157 .unwrap();
158 let f = repr.convert().inner();
159
160 ESExpr::Float16(f16::from_f32(f))
161 }
162 else if s.ends_with('f') || s.ends_with('F') {
163 #[expect(
164 clippy::unwrap_used,
165 reason = "Shouldn't fail because the parser should ensure the format is valid."
166 )]
167 let repr: hexponent::FloatLiteral = s
168 .trim_end_matches('f')
169 .trim_end_matches('F')
170 .parse::<hexponent::FloatLiteral>()
171 .unwrap();
172 let f = repr.convert().inner();
173
174 ESExpr::Float32(f)
175 }
176 else {
177 #[expect(
178 clippy::unwrap_used,
179 reason = "Shouldn't fail because the parser should ensure the format is valid."
180 )]
181 let repr: hexponent::FloatLiteral = s
182 .trim_end_matches('d')
183 .trim_end_matches('D')
184 .parse::<hexponent::FloatLiteral>()
185 .unwrap();
186 let d = repr.convert().inner();
187 ESExpr::Float64(d)
188 }
189}
190
191fn float16_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
192 map_res(
193 (
194 tag("#float16:"),
195 opt(one_of("+-")),
196 tag("nan"),
197 opt(preceded(
198 tag(":"),
199 nom::character::complete::u16
200 ))
201 ),
202 |(_, sign, _, payload)| {
203 let is_neg = sign.is_some_and(|sign| sign == '-');
204
205 let Some(payload) = payload else {
206 if is_neg {
207 return Ok(ESExpr::Float16(-f16::NAN));
208 }
209 else {
210 return Ok(ESExpr::Float16(f16::NAN));
211 }
212 };
213
214 if (payload & 0xFC00) != 0 {
215 return Err(LexErrorType::InvalidNaNPayload(u64::from(payload)))
216 }
217
218 let sign_bit: u16 = if is_neg { 0x8000 } else { 0 };
219 let exponent: u16 = 0x7C00;
220
221 let f = f16::from_bits(sign_bit | exponent | payload);
222
223 Ok(ESExpr::Float16(f))
224 }
225 ).parse(input)
226}
227
228fn float32_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
229 map_res(
230 (
231 tag("#float32:"),
232 opt(one_of("+-")),
233 tag("nan"),
234 opt(preceded(
235 tag(":"),
236 nom::character::complete::u32
237 ))
238 ),
239 |(_, sign, _, payload)| {
240 let is_neg = sign.is_some_and(|sign| sign == '-');
241
242 let Some(payload) = payload else {
243 if is_neg {
244 return Ok(ESExpr::Float32(-f32::NAN));
245 }
246 else {
247 return Ok(ESExpr::Float32(f32::NAN));
248 }
249 };
250
251 if (payload & 0xFF800000) != 0 {
252 return Err(LexErrorType::InvalidNaNPayload(u64::from(payload)))
253 }
254
255 let sign_bit: u32 = if is_neg { 0x80000000 } else { 0 };
256 let exponent: u32 = 0x7F800000;
257
258 let f = f32::from_bits(sign_bit | exponent | payload);
259
260 Ok(ESExpr::Float32(f))
261 }
262 ).parse(input)
263}
264
265fn float64_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
266 map_res(
267 (
268 tag("#float64:"),
269 opt(one_of("+-")),
270 tag("nan"),
271 opt(preceded(
272 tag(":"),
273 nom::character::complete::u64
274 ))
275 ),
276 |(_, sign, _, payload)| {
277 let is_neg = sign.is_some_and(|sign| sign == '-');
278
279 let Some(payload) = payload else {
280 if is_neg {
281 return Ok(ESExpr::Float64(-f64::NAN));
282 }
283 else {
284 return Ok(ESExpr::Float64(f64::NAN));
285 }
286 };
287
288 if (payload & 0xFFF0000000000000) != 0 {
289 return Err(LexErrorType::InvalidNaNPayload(payload));
290 }
291
292 let sign_bit: u64 = if is_neg { 0x8000000000000000 } else { 0 };
293 let exponent: u64 = 0x7FF0000000000000;
294
295 let f = f64::from_bits(sign_bit | exponent | payload);
296
297 Ok(ESExpr::Float64(f))
298 }
299 ).parse(input)
300}
301
302fn float<'a>(input: &'a str) -> IResult<&'a str, ESExpr<'static>> {
303 preceded(
304 skip_ws,
305 alt((
306 float_decimal,
307 float_hex,
308 float16_nan,
309 atom(ESExpr::Float16(f16::INFINITY), "#float16:+inf"),
310 atom(ESExpr::Float16(f16::NEG_INFINITY), "#float16:-inf"),
311 float32_nan,
312 atom(ESExpr::Float32(f32::INFINITY), "#float32:+inf"),
313 atom(ESExpr::Float32(f32::NEG_INFINITY), "#float32:-inf"),
314 float64_nan,
315 atom(ESExpr::Float64(f64::INFINITY), "#float64:+inf"),
316 atom(ESExpr::Float64(f64::NEG_INFINITY), "#float64:-inf"),
317 )),
318 )
319 .parse(input)
320}
321
322fn integer(input: &str) -> IResult<&str, BigInt> {
323 preceded(
324 skip_ws,
325 alt((
326 map(
327 recognize((opt(one_of("+-")), tag_no_case("0x"), hex_digit1)),
328 |s: &str| parse_int_base(s, 16),
329 ),
330 map(recognize((opt(one_of("+-")), digit1)), |s: &str| {
331 #[expect(
332 clippy::unwrap_used,
333 reason = "Shouldn't fail because the parser should ensure the format is valid."
334 )]
335 s.parse::<BigInt>().unwrap()
336 }),
337 )),
338 )
339 .parse(input)
340}
341
342fn parse_int_base(s: &str, radix: u32) -> BigInt {
343 let sign = if s.starts_with('-') { Sign::Minus } else { Sign::Plus };
344
345 let s = s
346 .trim_start_matches('+')
347 .trim_start_matches('-')
348 .trim_start_matches("0x")
349 .trim_start_matches("0X");
350
351 let b: Vec<u8> = s
352 .chars()
353 .map(|c| {
354 #[expect(
355 clippy::unwrap_used,
356 reason = "Shouldn't fail because the parser should ensure the format is valid."
357 )]
358 #[expect(
359 clippy::cast_possible_truncation,
360 reason = "Shouldn't be out of range because it is a single digit"
361 )]
362 {
363 c.to_digit(radix).unwrap() as u8
364 }
365 })
366 .collect();
367
368 #[expect(
369 clippy::unwrap_used,
370 reason = "Shouldn't fail because the parser should ensure the format is valid."
371 )]
372 BigInt::from_radix_be(sign, &b, radix).unwrap()
373}
374
375fn string(input: &str) -> IResult<&str, String> {
376 preceded(skip_ws, string_impl('"', "\"\\")).parse(input)
377}
378
379fn string_impl<'a>(
380 quote: char,
381 non_normal_chars: &'static str,
382) -> impl Parser<&'a str, Output = String, Error = nom::error::Error<&'a str>> {
383 move |input| {
384 delimited(
385 char(quote),
386 escaped_transform(
387 none_of(non_normal_chars),
388 '\\',
389 alt((
390 value('\x0C', char('f')),
391 value('\n', char('n')),
392 value('\r', char('r')),
393 value('\t', char('t')),
394 value('\\', char('\\')),
395 value('"', char('"')),
396 value('\'', char('\'')),
397 delimited(
398 tag("u{"),
399 map_res(hex_digit1, |codepoint| -> Result<core::primitive::char, LexErrorType> {
400 #[expect(
401 clippy::unwrap_used,
402 reason = "Shouldn't fail because the parser should ensure the format is valid."
403 )]
404 let codepoint = u32::from_str_radix(codepoint, 16).unwrap();
405 char::from_u32(codepoint).ok_or(LexErrorType::InvalidUnicodeCodePoint(codepoint))
406 }),
407 char('}'),
408 ),
409 )),
410 ),
411 char(quote),
412 )
413 .parse(input)
414 }
415}
416
417fn binary(input: &str) -> IResult<&str, ESExpr<'static>> {
418 alt((
419 map(
420 delimited(preceded(skip_ws, tag("#\"")), many0(hex_byte), cut(tag("\""))),
421 |b| ESExpr::Array8(Cow::Owned(b)),
422 ),
423 map(
424 delimited(
425 preceded(skip_ws, tag("#u8[")),
426 many0(map_res(preceded(skip_ws, integer), u8::try_from)),
427 preceded(skip_ws, cut(tag("]"))),
428 ),
429 |b| ESExpr::Array8(Cow::Owned(b)),
430 ),
431 map(
432 delimited(
433 preceded(skip_ws, tag("#u16[")),
434 many0(map_res(preceded(skip_ws, integer), u16::try_from)),
435 preceded(skip_ws, cut(tag("]"))),
436 ),
437 |b| ESExpr::Array16(Cow::Owned(b)),
438 ),
439 map(
440 delimited(
441 preceded(skip_ws, tag("#u32[")),
442 many0(map_res(preceded(skip_ws, integer), u32::try_from)),
443 preceded(skip_ws, cut(tag("]"))),
444 ),
445 |b| ESExpr::Array32(Cow::Owned(b)),
446 ),
447 map(
448 delimited(
449 preceded(skip_ws, tag("#u64[")),
450 many0(map_res(preceded(skip_ws, integer), u64::try_from)),
451 preceded(skip_ws, cut(tag("]"))),
452 ),
453 |b| ESExpr::Array64(Cow::Owned(b)),
454 ),
455 map(
456 delimited(
457 preceded(skip_ws, tag("#u128[")),
458 many0(map_res(preceded(skip_ws, integer), u128::try_from)),
459 preceded(skip_ws, cut(tag("]"))),
460 ),
461 |b| ESExpr::Array128(Cow::Owned(b)),
462 ),
463 ))
464 .parse(input)
465}
466
467fn hex_byte(input: &str) -> IResult<&str, u8> {
468 map(take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()), |s| {
469 #[expect(
470 clippy::unwrap_used,
471 reason = "Shouldn't fail because the parser should ensure the format is valid."
472 )]
473 u8::from_str_radix(s, 16).unwrap()
474 })
475 .parse(input)
476}
477
478enum ConstructorArg {
479 Positional(ESExpr<'static>),
480 Keyword(String, ESExpr<'static>),
481}
482
483fn constructor(input: &str) -> IResult<&str, ESExpr<'static>> {
484 map(
485 delimited(
486 preceded(skip_ws, char('(')),
487 pair(identifier, many0(constructor_arg)),
488 preceded(skip_ws, char(')')),
489 ),
490 |(name, args)| build_constructor(name, args),
491 )
492 .parse(input)
493}
494
495fn build_constructor(name: String, ctor_args: Vec<ConstructorArg>) -> ESExpr<'static> {
496 let mut args = Vec::new();
497 let mut kwargs = BTreeMap::new();
498
499 for arg in ctor_args {
500 match arg {
501 ConstructorArg::Positional(value) => args.push(value),
502 ConstructorArg::Keyword(name, value) => {
503 kwargs.insert(CowStr::Owned(name), value);
504 },
505 }
506 }
507
508 ESExpr::constructor(name, args, kwargs)
509}
510
511fn constructor_arg(input: &str) -> IResult<&str, ConstructorArg> {
512 alt((
513 map(
514 separated_pair(preceded(skip_ws, identifier), preceded(skip_ws, char(':')), expr),
515 |(name, value)| ConstructorArg::Keyword(name, value),
516 ),
517 map(expr, ConstructorArg::Positional),
518 ))
519 .parse(input)
520}
521
522fn null_atom(input: &str) -> IResult<&str, ESExpr<'static>> {
523 map((skip_ws, tag("#null"), digit1, not(alphanumeric1)), |(_, _, n, _)| {
524 #[expect(
525 clippy::unwrap_used,
526 reason = "Shouldn't fail because the parser should ensure the format is valid."
527 )]
528 ESExpr::Null(Cow::Owned(BigUint::from_str(n).unwrap()))
529 })
530 .parse(input)
531}
532
533fn atom<'a>(
534 expr: ESExpr<'static>,
535 s: &'static str,
536) -> impl Parser<&'a str, Output = ESExpr<'static>, Error = nom::error::Error<&'a str>> {
537 move |input| value(expr.clone(), preceded(skip_ws, terminated(tag(s), not(alphanumeric1)))).parse(input)
538}
539
540pub fn expr(input: &str) -> IResult<&str, ESExpr<'static>> {
545 alt((
546 float,
547 map(integer, |i| ESExpr::Int(Cow::Owned(i))),
548 map(string, |s| ESExpr::Str(CowStr::Owned(s))),
549 binary,
550 atom(ESExpr::Bool(true), "#true"),
551 atom(ESExpr::Bool(false), "#false"),
552 null_atom,
553 atom(ESExpr::Null(Cow::Owned(BigUint::ZERO)), "#null"),
554 constructor,
555 ))
556 .parse(input)
557}
558
559pub(crate) fn expr_file(input: &str) -> IResult<&str, ESExpr<'static>> {
560 terminated(terminated(expr, skip_ws), eof).parse(input)
561}
562
563pub(crate) fn multi_expr_file(input: &str) -> IResult<&str, Vec<ESExpr<'static>>> {
564 terminated(terminated(many0(expr), skip_ws), eof).parse(input)
565}