esexpr_text/
parser.rs

1use alloc::borrow::Cow;
2use alloc::collections::BTreeMap;
3use alloc::string::String;
4use alloc::vec::Vec;
5use core::str::FromStr;
6
7use esexpr::ESExpr;
8use esexpr::cowstr::CowStr;
9use half::f16;
10use nom::branch::alt;
11use nom::bytes::complete::{escaped_transform, tag, tag_no_case, take_until, take_while, take_while_m_n, take_while1};
12use nom::character::complete::{alphanumeric1, char, digit1, hex_digit1, multispace1, none_of, one_of};
13use nom::combinator::{cut, eof, map, map_res, not, opt, peek, recognize, value};
14use nom::multi::{many0, many0_count};
15use nom::sequence::{delimited, pair, preceded, separated_pair, terminated};
16use nom::{IResult, Parser};
17use num_bigint::{BigInt, BigUint, Sign};
18
19/// Represents a lexer error.
20#[derive(Debug, Clone, PartialEq)]
21pub enum LexErrorType {
22	/// Unexpected token.
23	UnexpectedToken,
24
25	/// Unterminated string.
26	UnterminatedString,
27
28	/// Unterminated identifier string.
29	UnterminatedIdentifierString,
30
31	/// Invalid unicode codepoint.
32	InvalidUnicodeCodePoint(u32),
33
34	/// Invalid NaN payload bits
35	InvalidNaNPayload(u64),
36}
37
38/// Parser that skips whitespace and comments.
39///
40/// # Errors
41/// Returns `Err` when parsing fails.
42pub fn skip_ws(input: &str) -> IResult<&str, ()> {
43	value((), many0_count(alt((value((), multispace1), comment)))).parse(input)
44}
45
46fn comment(input: &str) -> IResult<&str, ()> {
47	value((), pair(tag("//"), take_until("\n"))).parse(input)
48}
49
50fn is_alpha(c: char) -> bool {
51	c.is_ascii_lowercase()
52}
53
54fn is_alphanum(c: char) -> bool {
55	c.is_ascii_lowercase() || c.is_ascii_digit()
56}
57
58/// Parser for a simple identifier.
59///
60/// # Errors
61/// Returns `Err` when parsing fails.
62pub fn simple_identifier(input: &str) -> IResult<&str, &str> {
63	preceded(
64		skip_ws,
65		recognize((
66			take_while1(is_alpha),
67			take_while(is_alphanum),
68			many0(pair(char('-'), take_while1(is_alphanum))),
69		)),
70	)
71	.parse(input)
72}
73
74fn identifier(input: &str) -> IResult<&str, String> {
75	alt((
76		map(simple_identifier, String::from),
77		preceded(skip_ws, string_impl('\'', "'\\")),
78	))
79	.parse(input)
80}
81
82fn float_decimal(input: &str) -> IResult<&str, ESExpr<'static>> {
83	map(
84		recognize((
85			opt(one_of("+-")),
86			digit1,
87			char('.'),
88			cut(digit1),
89			opt((one_of("eE"), opt(one_of("+-")), digit1)),
90			opt(alt((tag("f16"), tag("F16"), tag("f"), tag("F"), tag("d"), tag("D")))),
91			not(peek(alphanumeric1)),
92		)),
93		parse_dec_float,
94	)
95	.parse(input)
96}
97
98fn parse_dec_float(s: &str) -> ESExpr<'static> {
99	if s.ends_with("f16") || s.ends_with("F16") {
100		#[expect(
101			clippy::unwrap_used,
102			reason = "Shouldn't fail because the parser should ensure the format is valid."
103		)]
104		let f = s.trim_end_matches("f16")
105			.trim_end_matches("F16")
106			.parse::<f16>()
107			.unwrap();
108		ESExpr::Float16(f)
109	}
110	else if s.ends_with('f') || s.ends_with('F') {
111		#[expect(
112			clippy::unwrap_used,
113			reason = "Shouldn't fail because the parser should ensure the format is valid."
114		)]
115		let f = s.trim_end_matches('f').trim_end_matches('F').parse::<f32>().unwrap();
116		ESExpr::Float32(f)
117	}
118	else {
119		#[expect(
120			clippy::unwrap_used,
121			reason = "Shouldn't fail because the parser should ensure the format is valid."
122		)]
123		let d = s.trim_end_matches('f').trim_end_matches('F').parse::<f64>().unwrap();
124		ESExpr::Float64(d)
125	}
126}
127
128fn float_hex(input: &str) -> IResult<&str, ESExpr<'static>> {
129	map(
130		recognize((
131			opt(one_of("+-")),
132			tag_no_case("0x"),
133			hex_digit1,
134			char('.'),
135			hex_digit1,
136			cut(one_of("pP")),
137			opt(one_of("+-")),
138			digit1,
139			opt(alt((tag("f16"), tag("F16"), tag("f"), tag("F"), tag("d"), tag("D")))),
140			not(peek(alphanumeric1)),
141		)),
142		parse_hex_float,
143	)
144	.parse(input)
145}
146
147fn parse_hex_float(s: &str) -> ESExpr<'static> {
148	if s.ends_with("f16") || s.ends_with("F16") {
149		#[expect(
150			clippy::unwrap_used,
151			reason = "Shouldn't fail because the parser should ensure the format is valid."
152		)]
153		let repr: hexponent::FloatLiteral = s
154			.trim_end_matches("f16")
155			.trim_end_matches("f16")
156			.parse::<hexponent::FloatLiteral>()
157			.unwrap();
158		let f = repr.convert().inner();
159
160		ESExpr::Float16(f16::from_f32(f))
161	}
162	else if s.ends_with('f') || s.ends_with('F') {
163		#[expect(
164			clippy::unwrap_used,
165			reason = "Shouldn't fail because the parser should ensure the format is valid."
166		)]
167		let repr: hexponent::FloatLiteral = s
168			.trim_end_matches('f')
169			.trim_end_matches('F')
170			.parse::<hexponent::FloatLiteral>()
171			.unwrap();
172		let f = repr.convert().inner();
173
174		ESExpr::Float32(f)
175	}
176	else {
177		#[expect(
178			clippy::unwrap_used,
179			reason = "Shouldn't fail because the parser should ensure the format is valid."
180		)]
181		let repr: hexponent::FloatLiteral = s
182			.trim_end_matches('d')
183			.trim_end_matches('D')
184			.parse::<hexponent::FloatLiteral>()
185			.unwrap();
186		let d = repr.convert().inner();
187		ESExpr::Float64(d)
188	}
189}
190
191fn float16_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
192	map_res(
193		(
194			tag("#float16:"),
195			opt(one_of("+-")),
196			tag("nan"),
197			opt(preceded(
198				tag(":"),
199				nom::character::complete::u16
200			))
201		),
202		|(_, sign, _, payload)| {
203			let is_neg = sign.is_some_and(|sign| sign == '-');
204
205			let Some(payload) = payload else {
206				if is_neg {
207					return Ok(ESExpr::Float16(-f16::NAN));
208				}
209				else {
210					return Ok(ESExpr::Float16(f16::NAN));
211				}
212			};
213
214			if (payload & 0xFC00) != 0 {
215				return Err(LexErrorType::InvalidNaNPayload(u64::from(payload)))
216			}
217
218			let sign_bit: u16 = if is_neg { 0x8000 } else { 0 };
219			let exponent: u16 = 0x7C00;
220
221			let f = f16::from_bits(sign_bit | exponent | payload);
222
223			Ok(ESExpr::Float16(f))
224		}
225	).parse(input)
226}
227
228fn float32_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
229	map_res(
230		(
231			tag("#float32:"),
232			opt(one_of("+-")),
233			tag("nan"),
234			opt(preceded(
235				tag(":"),
236				nom::character::complete::u32
237			))
238		),
239		|(_, sign, _, payload)| {
240			let is_neg = sign.is_some_and(|sign| sign == '-');
241
242			let Some(payload) = payload else {
243				if is_neg {
244					return Ok(ESExpr::Float32(-f32::NAN));
245				}
246				else {
247					return Ok(ESExpr::Float32(f32::NAN));
248				}
249			};
250
251			if (payload & 0xFF800000) != 0 {
252				return Err(LexErrorType::InvalidNaNPayload(u64::from(payload)))
253			}
254
255			let sign_bit: u32 = if is_neg { 0x80000000 } else { 0 };
256			let exponent: u32 = 0x7F800000;
257
258			let f = f32::from_bits(sign_bit | exponent | payload);
259
260			Ok(ESExpr::Float32(f))
261		}
262	).parse(input)
263}
264
265fn float64_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
266	map_res(
267		(
268			tag("#float64:"),
269			opt(one_of("+-")),
270			tag("nan"),
271			opt(preceded(
272				tag(":"),
273				nom::character::complete::u64
274			))
275		),
276		|(_, sign, _, payload)| {
277			let is_neg = sign.is_some_and(|sign| sign == '-');
278
279			let Some(payload) = payload else {
280				if is_neg {
281					return Ok(ESExpr::Float64(-f64::NAN));
282				}
283				else {
284					return Ok(ESExpr::Float64(f64::NAN));
285				}
286			};
287
288			if (payload & 0xFFF0000000000000) != 0 {
289				return Err(LexErrorType::InvalidNaNPayload(payload));
290			}
291
292			let sign_bit: u64 = if is_neg { 0x8000000000000000 } else { 0 };
293			let exponent: u64 = 0x7FF0000000000000;
294
295			let f = f64::from_bits(sign_bit | exponent | payload);
296
297			Ok(ESExpr::Float64(f))
298		}
299	).parse(input)
300}
301
302fn float<'a>(input: &'a str) -> IResult<&'a str, ESExpr<'static>> {
303	preceded(
304		skip_ws,
305		alt((
306			float_decimal,
307			float_hex,
308			float16_nan,
309			atom(ESExpr::Float16(f16::INFINITY), "#float16:+inf"),
310			atom(ESExpr::Float16(f16::NEG_INFINITY), "#float16:-inf"),
311			float32_nan,
312			atom(ESExpr::Float32(f32::INFINITY), "#float32:+inf"),
313			atom(ESExpr::Float32(f32::NEG_INFINITY), "#float32:-inf"),
314			float64_nan,
315			atom(ESExpr::Float64(f64::INFINITY), "#float64:+inf"),
316			atom(ESExpr::Float64(f64::NEG_INFINITY), "#float64:-inf"),
317		)),
318	)
319	.parse(input)
320}
321
322fn integer(input: &str) -> IResult<&str, BigInt> {
323	preceded(
324		skip_ws,
325		alt((
326			map(
327				recognize((opt(one_of("+-")), tag_no_case("0x"), hex_digit1)),
328				|s: &str| parse_int_base(s, 16),
329			),
330			map(recognize((opt(one_of("+-")), digit1)), |s: &str| {
331				#[expect(
332					clippy::unwrap_used,
333					reason = "Shouldn't fail because the parser should ensure the format is valid."
334				)]
335				s.parse::<BigInt>().unwrap()
336			}),
337		)),
338	)
339	.parse(input)
340}
341
342fn parse_int_base(s: &str, radix: u32) -> BigInt {
343	let sign = if s.starts_with('-') { Sign::Minus } else { Sign::Plus };
344
345	let s = s
346		.trim_start_matches('+')
347		.trim_start_matches('-')
348		.trim_start_matches("0x")
349		.trim_start_matches("0X");
350
351	let b: Vec<u8> = s
352		.chars()
353		.map(|c| {
354			#[expect(
355				clippy::unwrap_used,
356				reason = "Shouldn't fail because the parser should ensure the format is valid."
357			)]
358			#[expect(
359				clippy::cast_possible_truncation,
360				reason = "Shouldn't be out of range because it is a single digit"
361			)]
362			{
363				c.to_digit(radix).unwrap() as u8
364			}
365		})
366		.collect();
367
368	#[expect(
369		clippy::unwrap_used,
370		reason = "Shouldn't fail because the parser should ensure the format is valid."
371	)]
372	BigInt::from_radix_be(sign, &b, radix).unwrap()
373}
374
375fn string(input: &str) -> IResult<&str, String> {
376	preceded(skip_ws, string_impl('"', "\"\\")).parse(input)
377}
378
379fn string_impl<'a>(
380	quote: char,
381	non_normal_chars: &'static str,
382) -> impl Parser<&'a str, Output = String, Error = nom::error::Error<&'a str>> {
383	move |input| {
384		delimited(
385			char(quote),
386			escaped_transform(
387				none_of(non_normal_chars),
388				'\\',
389				alt((
390					value('\x0C', char('f')),
391					value('\n', char('n')),
392					value('\r', char('r')),
393					value('\t', char('t')),
394					value('\\', char('\\')),
395					value('"', char('"')),
396					value('\'', char('\'')),
397					delimited(
398						tag("u{"),
399						map_res(hex_digit1, |codepoint| -> Result<core::primitive::char, LexErrorType> {
400							#[expect(
401								clippy::unwrap_used,
402								reason = "Shouldn't fail because the parser should ensure the format is valid."
403							)]
404							let codepoint = u32::from_str_radix(codepoint, 16).unwrap();
405							char::from_u32(codepoint).ok_or(LexErrorType::InvalidUnicodeCodePoint(codepoint))
406						}),
407						char('}'),
408					),
409				)),
410			),
411			char(quote),
412		)
413		.parse(input)
414	}
415}
416
417fn binary(input: &str) -> IResult<&str, ESExpr<'static>> {
418	alt((
419		map(
420			delimited(preceded(skip_ws, tag("#\"")), many0(hex_byte), cut(tag("\""))),
421			|b| ESExpr::Array8(Cow::Owned(b)),
422		),
423		map(
424			delimited(
425				preceded(skip_ws, tag("#u8[")),
426				many0(map_res(preceded(skip_ws, integer), u8::try_from)),
427				preceded(skip_ws, cut(tag("]"))),
428			),
429			|b| ESExpr::Array8(Cow::Owned(b)),
430		),
431		map(
432			delimited(
433				preceded(skip_ws, tag("#u16[")),
434				many0(map_res(preceded(skip_ws, integer), u16::try_from)),
435				preceded(skip_ws, cut(tag("]"))),
436			),
437			|b| ESExpr::Array16(Cow::Owned(b)),
438		),
439		map(
440			delimited(
441				preceded(skip_ws, tag("#u32[")),
442				many0(map_res(preceded(skip_ws, integer), u32::try_from)),
443				preceded(skip_ws, cut(tag("]"))),
444			),
445			|b| ESExpr::Array32(Cow::Owned(b)),
446		),
447		map(
448			delimited(
449				preceded(skip_ws, tag("#u64[")),
450				many0(map_res(preceded(skip_ws, integer), u64::try_from)),
451				preceded(skip_ws, cut(tag("]"))),
452			),
453			|b| ESExpr::Array64(Cow::Owned(b)),
454		),
455		map(
456			delimited(
457				preceded(skip_ws, tag("#u128[")),
458				many0(map_res(preceded(skip_ws, integer), u128::try_from)),
459				preceded(skip_ws, cut(tag("]"))),
460			),
461			|b| ESExpr::Array128(Cow::Owned(b)),
462		),
463	))
464	.parse(input)
465}
466
467fn hex_byte(input: &str) -> IResult<&str, u8> {
468	map(take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()), |s| {
469		#[expect(
470			clippy::unwrap_used,
471			reason = "Shouldn't fail because the parser should ensure the format is valid."
472		)]
473		u8::from_str_radix(s, 16).unwrap()
474	})
475	.parse(input)
476}
477
478enum ConstructorArg {
479	Positional(ESExpr<'static>),
480	Keyword(String, ESExpr<'static>),
481}
482
483fn constructor(input: &str) -> IResult<&str, ESExpr<'static>> {
484	map(
485		delimited(
486			preceded(skip_ws, char('(')),
487			pair(identifier, many0(constructor_arg)),
488			preceded(skip_ws, char(')')),
489		),
490		|(name, args)| build_constructor(name, args),
491	)
492	.parse(input)
493}
494
495fn build_constructor(name: String, ctor_args: Vec<ConstructorArg>) -> ESExpr<'static> {
496	let mut args = Vec::new();
497	let mut kwargs = BTreeMap::new();
498
499	for arg in ctor_args {
500		match arg {
501			ConstructorArg::Positional(value) => args.push(value),
502			ConstructorArg::Keyword(name, value) => {
503				kwargs.insert(CowStr::Owned(name), value);
504			},
505		}
506	}
507
508	ESExpr::constructor(name, args, kwargs)
509}
510
511fn constructor_arg(input: &str) -> IResult<&str, ConstructorArg> {
512	alt((
513		map(
514			separated_pair(preceded(skip_ws, identifier), preceded(skip_ws, char(':')), expr),
515			|(name, value)| ConstructorArg::Keyword(name, value),
516		),
517		map(expr, ConstructorArg::Positional),
518	))
519	.parse(input)
520}
521
522fn null_atom(input: &str) -> IResult<&str, ESExpr<'static>> {
523	map((skip_ws, tag("#null"), digit1, not(alphanumeric1)), |(_, _, n, _)| {
524		#[expect(
525			clippy::unwrap_used,
526			reason = "Shouldn't fail because the parser should ensure the format is valid."
527		)]
528		ESExpr::Null(Cow::Owned(BigUint::from_str(n).unwrap()))
529	})
530	.parse(input)
531}
532
533fn atom<'a>(
534	expr: ESExpr<'static>,
535	s: &'static str,
536) -> impl Parser<&'a str, Output = ESExpr<'static>, Error = nom::error::Error<&'a str>> {
537	move |input| value(expr.clone(), preceded(skip_ws, terminated(tag(s), not(alphanumeric1)))).parse(input)
538}
539
540/// Parser for an `ESExpr` expression.
541///
542/// # Errors
543/// Returns `Err` when parsing fails.
544pub fn expr(input: &str) -> IResult<&str, ESExpr<'static>> {
545	alt((
546		float,
547		map(integer, |i| ESExpr::Int(Cow::Owned(i))),
548		map(string, |s| ESExpr::Str(CowStr::Owned(s))),
549		binary,
550		atom(ESExpr::Bool(true), "#true"),
551		atom(ESExpr::Bool(false), "#false"),
552		null_atom,
553		atom(ESExpr::Null(Cow::Owned(BigUint::ZERO)), "#null"),
554		constructor,
555	))
556	.parse(input)
557}
558
559pub(crate) fn expr_file(input: &str) -> IResult<&str, ESExpr<'static>> {
560	terminated(terminated(expr, skip_ws), eof).parse(input)
561}
562
563pub(crate) fn multi_expr_file(input: &str) -> IResult<&str, Vec<ESExpr<'static>>> {
564	terminated(terminated(many0(expr), skip_ws), eof).parse(input)
565}