1use alloc::borrow::Cow;
2use alloc::collections::BTreeMap;
3use alloc::string::String;
4use alloc::vec::Vec;
5use core::str::FromStr;
6
7use esexpr::ESExpr;
8use esexpr::cowstr::CowStr;
9use half::f16;
10use nom::branch::alt;
11use nom::bytes::complete::{escaped_transform, tag, tag_no_case, take_until, take_while, take_while_m_n, take_while1};
12use nom::character::complete::{alphanumeric1, bin_digit1, char, digit1, hex_digit1, multispace1, none_of, oct_digit1, one_of};
13use nom::combinator::{cut, eof, map, map_res, not, opt, peek, recognize, value};
14use nom::multi::{many0, many0_count};
15use nom::sequence::{delimited, pair, preceded, separated_pair, terminated};
16use nom::{IResult, Parser};
17use num_bigint::{BigInt, BigUint, Sign};
18
19#[derive(Debug, Clone, PartialEq)]
21pub enum LexErrorType {
22 UnexpectedToken,
24
25 UnterminatedString,
27
28 UnterminatedIdentifierString,
30
31 InvalidUnicodeCodePoint(u32),
33
34 InvalidNaNPayload(u64),
36}
37
38pub fn skip_ws(input: &str) -> IResult<&str, ()> {
43 value((), many0_count(alt((value((), multispace1), comment)))).parse(input)
44}
45
46fn comment(input: &str) -> IResult<&str, ()> {
47 value((), pair(tag("//"), take_until("\n"))).parse(input)
48}
49
50fn is_alpha(c: char) -> bool {
51 c.is_ascii_lowercase()
52}
53
54fn is_alphanum(c: char) -> bool {
55 c.is_ascii_lowercase() || c.is_ascii_digit()
56}
57
58pub fn simple_identifier(input: &str) -> IResult<&str, &str> {
63 preceded(
64 skip_ws,
65 recognize((
66 take_while1(is_alpha),
67 take_while(is_alphanum),
68 many0(pair(char('-'), take_while1(is_alphanum))),
69 )),
70 )
71 .parse(input)
72}
73
74fn identifier(input: &str) -> IResult<&str, String> {
75 alt((
76 map(simple_identifier, String::from),
77 preceded(skip_ws, string_impl('\'', "'\\")),
78 ))
79 .parse(input)
80}
81
82fn float_decimal(input: &str) -> IResult<&str, ESExpr<'static>> {
83 map(
84 recognize((
85 opt(one_of("+-")),
86 digit1,
87 char('.'),
88 cut(digit1),
89 opt((one_of("eE"), opt(one_of("+-")), digit1)),
90 opt(alt((tag("f16"), tag("F16"), tag("f"), tag("F"), tag("d"), tag("D")))),
91 not(peek(alphanumeric1)),
92 )),
93 parse_dec_float,
94 )
95 .parse(input)
96}
97
98fn parse_dec_float(s: &str) -> ESExpr<'static> {
99 if s.ends_with("f16") || s.ends_with("F16") {
100 #[expect(
101 clippy::unwrap_used,
102 reason = "Shouldn't fail because the parser should ensure the format is valid."
103 )]
104 let f = s.trim_end_matches("f16")
105 .trim_end_matches("F16")
106 .parse::<f16>()
107 .unwrap();
108 ESExpr::Float16(f)
109 }
110 else if s.ends_with('f') || s.ends_with('F') {
111 #[expect(
112 clippy::unwrap_used,
113 reason = "Shouldn't fail because the parser should ensure the format is valid."
114 )]
115 let f = s.trim_end_matches('f').trim_end_matches('F').parse::<f32>().unwrap();
116 ESExpr::Float32(f)
117 }
118 else {
119 #[expect(
120 clippy::unwrap_used,
121 reason = "Shouldn't fail because the parser should ensure the format is valid."
122 )]
123 let d = s.trim_end_matches('f').trim_end_matches('F').parse::<f64>().unwrap();
124 ESExpr::Float64(d)
125 }
126}
127
128fn float_hex(input: &str) -> IResult<&str, ESExpr<'static>> {
129 map(
130 recognize((
131 opt(one_of("+-")),
132 tag_no_case("0x"),
133 hex_digit1,
134 char('.'),
135 hex_digit1,
136 cut(one_of("pP")),
137 opt(one_of("+-")),
138 digit1,
139 opt(alt((tag("f16"), tag("F16"), tag("f"), tag("F"), tag("d"), tag("D")))),
140 not(peek(alphanumeric1)),
141 )),
142 parse_hex_float,
143 )
144 .parse(input)
145}
146
147fn parse_hex_float(s: &str) -> ESExpr<'static> {
148 if s.ends_with("f16") || s.ends_with("F16") {
149 #[expect(
150 clippy::unwrap_used,
151 reason = "Shouldn't fail because the parser should ensure the format is valid."
152 )]
153 let repr: hexponent::FloatLiteral = s
154 .trim_end_matches("f16")
155 .trim_end_matches("f16")
156 .parse::<hexponent::FloatLiteral>()
157 .unwrap();
158 let f = repr.convert().inner();
159
160 ESExpr::Float16(f16::from_f32(f))
161 }
162 else if s.ends_with('f') || s.ends_with('F') {
163 #[expect(
164 clippy::unwrap_used,
165 reason = "Shouldn't fail because the parser should ensure the format is valid."
166 )]
167 let repr: hexponent::FloatLiteral = s
168 .trim_end_matches('f')
169 .trim_end_matches('F')
170 .parse::<hexponent::FloatLiteral>()
171 .unwrap();
172 let f = repr.convert().inner();
173
174 ESExpr::Float32(f)
175 }
176 else {
177 #[expect(
178 clippy::unwrap_used,
179 reason = "Shouldn't fail because the parser should ensure the format is valid."
180 )]
181 let repr: hexponent::FloatLiteral = s
182 .trim_end_matches('d')
183 .trim_end_matches('D')
184 .parse::<hexponent::FloatLiteral>()
185 .unwrap();
186 let d = repr.convert().inner();
187 ESExpr::Float64(d)
188 }
189}
190
191fn float16_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
192 map_res(
193 (
194 tag("#float16:"),
195 opt(one_of("+-")),
196 tag("nan"),
197 opt(preceded(
198 tag(":"),
199 nom::character::complete::u16
200 ))
201 ),
202 |(_, sign, _, payload)| {
203 let is_neg = sign.is_some_and(|sign| sign == '-');
204
205 let Some(payload) = payload else {
206 if is_neg {
207 return Ok(ESExpr::Float16(-f16::NAN));
208 }
209 else {
210 return Ok(ESExpr::Float16(f16::NAN));
211 }
212 };
213
214 if (payload & 0xFC00) != 0 {
215 return Err(LexErrorType::InvalidNaNPayload(u64::from(payload)))
216 }
217
218 let sign_bit: u16 = if is_neg { 0x8000 } else { 0 };
219 let exponent: u16 = 0x7C00;
220
221 let f = f16::from_bits(sign_bit | exponent | payload);
222
223 Ok(ESExpr::Float16(f))
224 }
225 ).parse(input)
226}
227
228fn float32_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
229 map_res(
230 (
231 tag("#float32:"),
232 opt(one_of("+-")),
233 tag("nan"),
234 opt(preceded(
235 tag(":"),
236 nom::character::complete::u32
237 ))
238 ),
239 |(_, sign, _, payload)| {
240 let is_neg = sign.is_some_and(|sign| sign == '-');
241
242 let Some(payload) = payload else {
243 if is_neg {
244 return Ok(ESExpr::Float32(-f32::NAN));
245 }
246 else {
247 return Ok(ESExpr::Float32(f32::NAN));
248 }
249 };
250
251 if (payload & 0xFF800000) != 0 {
252 return Err(LexErrorType::InvalidNaNPayload(u64::from(payload)))
253 }
254
255 let sign_bit: u32 = if is_neg { 0x80000000 } else { 0 };
256 let exponent: u32 = 0x7F800000;
257
258 let f = f32::from_bits(sign_bit | exponent | payload);
259
260 Ok(ESExpr::Float32(f))
261 }
262 ).parse(input)
263}
264
265fn float64_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
266 map_res(
267 (
268 tag("#float64:"),
269 opt(one_of("+-")),
270 tag("nan"),
271 opt(preceded(
272 tag(":"),
273 nom::character::complete::u64
274 ))
275 ),
276 |(_, sign, _, payload)| {
277 let is_neg = sign.is_some_and(|sign| sign == '-');
278
279 let Some(payload) = payload else {
280 if is_neg {
281 return Ok(ESExpr::Float64(-f64::NAN));
282 }
283 else {
284 return Ok(ESExpr::Float64(f64::NAN));
285 }
286 };
287
288 if (payload & 0xFFF0000000000000) != 0 {
289 return Err(LexErrorType::InvalidNaNPayload(payload));
290 }
291
292 let sign_bit: u64 = if is_neg { 0x8000000000000000 } else { 0 };
293 let exponent: u64 = 0x7FF0000000000000;
294
295 let f = f64::from_bits(sign_bit | exponent | payload);
296
297 Ok(ESExpr::Float64(f))
298 }
299 ).parse(input)
300}
301
302fn float<'a>(input: &'a str) -> IResult<&'a str, ESExpr<'static>> {
303 preceded(
304 skip_ws,
305 alt((
306 float_decimal,
307 float_hex,
308 float16_nan,
309 atom(ESExpr::Float16(f16::INFINITY), "#float16:+inf"),
310 atom(ESExpr::Float16(f16::NEG_INFINITY), "#float16:-inf"),
311 float32_nan,
312 atom(ESExpr::Float32(f32::INFINITY), "#float32:+inf"),
313 atom(ESExpr::Float32(f32::NEG_INFINITY), "#float32:-inf"),
314 float64_nan,
315 atom(ESExpr::Float64(f64::INFINITY), "#float64:+inf"),
316 atom(ESExpr::Float64(f64::NEG_INFINITY), "#float64:-inf"),
317 )),
318 )
319 .parse(input)
320}
321
322pub fn integer(input: &str) -> IResult<&str, BigInt> {
324 preceded(
325 skip_ws,
326 map((
327 opt(one_of("+-")),
328 unsigned_integer
329 ), |(sign, n)| {
330 let sign = if sign.is_some_and(|s| s == '-') { Sign::Minus } else { Sign::Plus };
331 BigInt::from_biguint(sign, n)
332 }),
333 )
334 .parse(input)
335}
336
337pub fn unsigned_integer(input: &str) -> IResult<&str, BigUint> {
339 preceded(
340 skip_ws,
341 alt((
342 map(
343 preceded(tag_no_case("0x"), hex_digit1),
344 |s: &str| parse_int_base(s, 16),
345 ),
346 map(
347 preceded(tag_no_case("0b"), bin_digit1),
348 |s: &str| parse_int_base(s, 2),
349 ),
350 map(
351 preceded(tag("0o"), oct_digit1),
352 |s: &str| parse_int_base(s, 8),
353 ),
354 map(recognize(digit1), |s: &str| {
355 #[expect(
356 clippy::unwrap_used,
357 reason = "Shouldn't fail because the parser should ensure the format is valid."
358 )]
359 s.parse::<BigUint>().unwrap()
360 }),
361 )),
362 )
363 .parse(input)
364}
365
366fn parse_int_base(s: &str, radix: u32) -> BigUint {
367 let b: Vec<u8> = s
368 .chars()
369 .map(|c| {
370 #[expect(
371 clippy::unwrap_used,
372 reason = "Shouldn't fail because the parser should ensure the format is valid."
373 )]
374 #[expect(
375 clippy::cast_possible_truncation,
376 reason = "Shouldn't be out of range because it is a single digit"
377 )]
378 {
379 c.to_digit(radix).unwrap() as u8
380 }
381 })
382 .collect();
383
384 #[expect(
385 clippy::unwrap_used,
386 reason = "Shouldn't fail because the parser should ensure the format is valid."
387 )]
388 BigUint::from_radix_be(&b, radix).unwrap()
389}
390
391fn string(input: &str) -> IResult<&str, String> {
392 preceded(skip_ws, string_impl('"', "\"\\")).parse(input)
393}
394
395fn string_impl<'a>(
396 quote: char,
397 non_normal_chars: &'static str,
398) -> impl Parser<&'a str, Output = String, Error = nom::error::Error<&'a str>> {
399 move |input| {
400 delimited(
401 char(quote),
402 escaped_transform(
403 none_of(non_normal_chars),
404 '\\',
405 alt((
406 value('\x0C', char('f')),
407 value('\n', char('n')),
408 value('\r', char('r')),
409 value('\t', char('t')),
410 value('\\', char('\\')),
411 value('"', char('"')),
412 value('\'', char('\'')),
413 delimited(
414 tag("u{"),
415 map_res(hex_digit1, |codepoint| -> Result<core::primitive::char, LexErrorType> {
416 #[expect(
417 clippy::unwrap_used,
418 reason = "Shouldn't fail because the parser should ensure the format is valid."
419 )]
420 let codepoint = u32::from_str_radix(codepoint, 16).unwrap();
421 char::from_u32(codepoint).ok_or(LexErrorType::InvalidUnicodeCodePoint(codepoint))
422 }),
423 char('}'),
424 ),
425 )),
426 ),
427 char(quote),
428 )
429 .parse(input)
430 }
431}
432
433fn binary(input: &str) -> IResult<&str, ESExpr<'static>> {
434 alt((
435 map(
436 delimited(preceded(skip_ws, tag("#\"")), many0(hex_byte), cut(tag("\""))),
437 |b| ESExpr::Array8(Cow::Owned(b)),
438 ),
439 map(
440 delimited(
441 preceded(skip_ws, tag("#u8[")),
442 many0(map_res(preceded(skip_ws, integer), u8::try_from)),
443 preceded(skip_ws, cut(tag("]"))),
444 ),
445 |b| ESExpr::Array8(Cow::Owned(b)),
446 ),
447 map(
448 delimited(
449 preceded(skip_ws, tag("#u16[")),
450 many0(map_res(preceded(skip_ws, integer), u16::try_from)),
451 preceded(skip_ws, cut(tag("]"))),
452 ),
453 |b| ESExpr::Array16(Cow::Owned(b)),
454 ),
455 map(
456 delimited(
457 preceded(skip_ws, tag("#u32[")),
458 many0(map_res(preceded(skip_ws, integer), u32::try_from)),
459 preceded(skip_ws, cut(tag("]"))),
460 ),
461 |b| ESExpr::Array32(Cow::Owned(b)),
462 ),
463 map(
464 delimited(
465 preceded(skip_ws, tag("#u64[")),
466 many0(map_res(preceded(skip_ws, integer), u64::try_from)),
467 preceded(skip_ws, cut(tag("]"))),
468 ),
469 |b| ESExpr::Array64(Cow::Owned(b)),
470 ),
471 map(
472 delimited(
473 preceded(skip_ws, tag("#u128[")),
474 many0(map_res(preceded(skip_ws, integer), u128::try_from)),
475 preceded(skip_ws, cut(tag("]"))),
476 ),
477 |b| ESExpr::Array128(Cow::Owned(b)),
478 ),
479 ))
480 .parse(input)
481}
482
483fn hex_byte(input: &str) -> IResult<&str, u8> {
484 map(take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()), |s| {
485 #[expect(
486 clippy::unwrap_used,
487 reason = "Shouldn't fail because the parser should ensure the format is valid."
488 )]
489 u8::from_str_radix(s, 16).unwrap()
490 })
491 .parse(input)
492}
493
494enum ConstructorArg {
495 Positional(ESExpr<'static>),
496 Keyword(String, ESExpr<'static>),
497}
498
499fn constructor(input: &str) -> IResult<&str, ESExpr<'static>> {
500 map(
501 delimited(
502 preceded(skip_ws, char('(')),
503 pair(identifier, many0(constructor_arg)),
504 preceded(skip_ws, char(')')),
505 ),
506 |(name, args)| build_constructor(name, args),
507 )
508 .parse(input)
509}
510
511fn build_constructor(name: String, ctor_args: Vec<ConstructorArg>) -> ESExpr<'static> {
512 let mut args = Vec::new();
513 let mut kwargs = BTreeMap::new();
514
515 for arg in ctor_args {
516 match arg {
517 ConstructorArg::Positional(value) => args.push(value),
518 ConstructorArg::Keyword(name, value) => {
519 kwargs.insert(CowStr::Owned(name), value);
520 },
521 }
522 }
523
524 ESExpr::constructor(name, args, kwargs)
525}
526
527fn constructor_arg(input: &str) -> IResult<&str, ConstructorArg> {
528 alt((
529 map(
530 separated_pair(preceded(skip_ws, identifier), preceded(skip_ws, char(':')), expr),
531 |(name, value)| ConstructorArg::Keyword(name, value),
532 ),
533 map(expr, ConstructorArg::Positional),
534 ))
535 .parse(input)
536}
537
538fn null_atom(input: &str) -> IResult<&str, ESExpr<'static>> {
539 map((skip_ws, tag("#null"), digit1, not(alphanumeric1)), |(_, _, n, _)| {
540 #[expect(
541 clippy::unwrap_used,
542 reason = "Shouldn't fail because the parser should ensure the format is valid."
543 )]
544 ESExpr::Null(Cow::Owned(BigUint::from_str(n).unwrap()))
545 })
546 .parse(input)
547}
548
549fn atom<'a>(
550 expr: ESExpr<'static>,
551 s: &'static str,
552) -> impl Parser<&'a str, Output = ESExpr<'static>, Error = nom::error::Error<&'a str>> {
553 move |input| value(expr.clone(), preceded(skip_ws, terminated(tag(s), not(alphanumeric1)))).parse(input)
554}
555
556pub fn expr(input: &str) -> IResult<&str, ESExpr<'static>> {
561 alt((
562 float,
563 map(integer, |i| ESExpr::Int(Cow::Owned(i))),
564 map(string, |s| ESExpr::Str(CowStr::Owned(s))),
565 binary,
566 atom(ESExpr::Bool(true), "#true"),
567 atom(ESExpr::Bool(false), "#false"),
568 null_atom,
569 atom(ESExpr::Null(Cow::Owned(BigUint::ZERO)), "#null"),
570 constructor,
571 ))
572 .parse(input)
573}
574
575pub(crate) fn expr_file(input: &str) -> IResult<&str, ESExpr<'static>> {
576 terminated(terminated(expr, skip_ws), eof).parse(input)
577}
578
579pub(crate) fn multi_expr_file(input: &str) -> IResult<&str, Vec<ESExpr<'static>>> {
580 terminated(terminated(many0(expr), skip_ws), eof).parse(input)
581}