Skip to main content

esexpr_text/
parser.rs

1use alloc::borrow::Cow;
2use alloc::collections::BTreeMap;
3use alloc::string::String;
4use alloc::vec::Vec;
5use core::str::FromStr;
6
7use esexpr::ESExpr;
8use esexpr::cowstr::CowStr;
9use half::f16;
10use nom::branch::alt;
11use nom::bytes::complete::{escaped_transform, tag, tag_no_case, take_until, take_while, take_while_m_n, take_while1};
12use nom::character::complete::{alphanumeric1, bin_digit1, char, digit1, hex_digit1, multispace1, none_of, oct_digit1, one_of};
13use nom::combinator::{cut, eof, map, map_res, not, opt, peek, recognize, value};
14use nom::multi::{many0, many0_count};
15use nom::sequence::{delimited, pair, preceded, separated_pair, terminated};
16use nom::{IResult, Parser};
17use num_bigint::{BigInt, BigUint, Sign};
18
19/// Represents a lexer error.
20#[derive(Debug, Clone, PartialEq)]
21pub enum LexErrorType {
22	/// Unexpected token.
23	UnexpectedToken,
24
25	/// Unterminated string.
26	UnterminatedString,
27
28	/// Unterminated identifier string.
29	UnterminatedIdentifierString,
30
31	/// Invalid unicode codepoint.
32	InvalidUnicodeCodePoint(u32),
33
34	/// Invalid NaN payload bits
35	InvalidNaNPayload(u64),
36}
37
38/// Parser that skips whitespace and comments.
39///
40/// # Errors
41/// Returns `Err` when parsing fails.
42pub fn skip_ws(input: &str) -> IResult<&str, ()> {
43	value((), many0_count(alt((value((), multispace1), comment)))).parse(input)
44}
45
46fn comment(input: &str) -> IResult<&str, ()> {
47	value((), pair(tag("//"), take_until("\n"))).parse(input)
48}
49
50fn is_alpha(c: char) -> bool {
51	c.is_ascii_lowercase()
52}
53
54fn is_alphanum(c: char) -> bool {
55	c.is_ascii_lowercase() || c.is_ascii_digit()
56}
57
58/// Parser for a simple identifier.
59///
60/// # Errors
61/// Returns `Err` when parsing fails.
62pub fn simple_identifier(input: &str) -> IResult<&str, &str> {
63	preceded(
64		skip_ws,
65		recognize((
66			take_while1(is_alpha),
67			take_while(is_alphanum),
68			many0(pair(char('-'), take_while1(is_alphanum))),
69		)),
70	)
71	.parse(input)
72}
73
74fn identifier(input: &str) -> IResult<&str, String> {
75	alt((
76		map(simple_identifier, String::from),
77		preceded(skip_ws, string_impl('\'', "'\\")),
78	))
79	.parse(input)
80}
81
82fn float_decimal(input: &str) -> IResult<&str, ESExpr<'static>> {
83	map(
84		recognize((
85			opt(one_of("+-")),
86			digit1,
87			char('.'),
88			cut(digit1),
89			opt((one_of("eE"), opt(one_of("+-")), digit1)),
90			opt(alt((tag("f16"), tag("F16"), tag("f"), tag("F"), tag("d"), tag("D")))),
91			not(peek(alphanumeric1)),
92		)),
93		parse_dec_float,
94	)
95	.parse(input)
96}
97
98fn parse_dec_float(s: &str) -> ESExpr<'static> {
99	if s.ends_with("f16") || s.ends_with("F16") {
100		#[expect(
101			clippy::unwrap_used,
102			reason = "Shouldn't fail because the parser should ensure the format is valid."
103		)]
104		let f = s.trim_end_matches("f16")
105			.trim_end_matches("F16")
106			.parse::<f16>()
107			.unwrap();
108		ESExpr::Float16(f)
109	}
110	else if s.ends_with('f') || s.ends_with('F') {
111		#[expect(
112			clippy::unwrap_used,
113			reason = "Shouldn't fail because the parser should ensure the format is valid."
114		)]
115		let f = s.trim_end_matches('f').trim_end_matches('F').parse::<f32>().unwrap();
116		ESExpr::Float32(f)
117	}
118	else {
119		#[expect(
120			clippy::unwrap_used,
121			reason = "Shouldn't fail because the parser should ensure the format is valid."
122		)]
123		let d = s.trim_end_matches('f').trim_end_matches('F').parse::<f64>().unwrap();
124		ESExpr::Float64(d)
125	}
126}
127
128fn float_hex(input: &str) -> IResult<&str, ESExpr<'static>> {
129	map(
130		recognize((
131			opt(one_of("+-")),
132			tag_no_case("0x"),
133			hex_digit1,
134			char('.'),
135			hex_digit1,
136			cut(one_of("pP")),
137			opt(one_of("+-")),
138			digit1,
139			opt(alt((tag("f16"), tag("F16"), tag("f"), tag("F"), tag("d"), tag("D")))),
140			not(peek(alphanumeric1)),
141		)),
142		parse_hex_float,
143	)
144	.parse(input)
145}
146
147fn parse_hex_float(s: &str) -> ESExpr<'static> {
148	if s.ends_with("f16") || s.ends_with("F16") {
149		#[expect(
150			clippy::unwrap_used,
151			reason = "Shouldn't fail because the parser should ensure the format is valid."
152		)]
153		let repr: hexponent::FloatLiteral = s
154			.trim_end_matches("f16")
155			.trim_end_matches("f16")
156			.parse::<hexponent::FloatLiteral>()
157			.unwrap();
158		let f = repr.convert().inner();
159
160		ESExpr::Float16(f16::from_f32(f))
161	}
162	else if s.ends_with('f') || s.ends_with('F') {
163		#[expect(
164			clippy::unwrap_used,
165			reason = "Shouldn't fail because the parser should ensure the format is valid."
166		)]
167		let repr: hexponent::FloatLiteral = s
168			.trim_end_matches('f')
169			.trim_end_matches('F')
170			.parse::<hexponent::FloatLiteral>()
171			.unwrap();
172		let f = repr.convert().inner();
173
174		ESExpr::Float32(f)
175	}
176	else {
177		#[expect(
178			clippy::unwrap_used,
179			reason = "Shouldn't fail because the parser should ensure the format is valid."
180		)]
181		let repr: hexponent::FloatLiteral = s
182			.trim_end_matches('d')
183			.trim_end_matches('D')
184			.parse::<hexponent::FloatLiteral>()
185			.unwrap();
186		let d = repr.convert().inner();
187		ESExpr::Float64(d)
188	}
189}
190
191fn float16_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
192	map_res(
193		(
194			tag("#float16:"),
195			opt(one_of("+-")),
196			tag("nan"),
197			opt(preceded(
198				tag(":"),
199				nom::character::complete::u16
200			))
201		),
202		|(_, sign, _, payload)| {
203			let is_neg = sign.is_some_and(|sign| sign == '-');
204
205			let Some(payload) = payload else {
206				if is_neg {
207					return Ok(ESExpr::Float16(-f16::NAN));
208				}
209				else {
210					return Ok(ESExpr::Float16(f16::NAN));
211				}
212			};
213
214			if (payload & 0xFC00) != 0 {
215				return Err(LexErrorType::InvalidNaNPayload(u64::from(payload)))
216			}
217
218			let sign_bit: u16 = if is_neg { 0x8000 } else { 0 };
219			let exponent: u16 = 0x7C00;
220
221			let f = f16::from_bits(sign_bit | exponent | payload);
222
223			Ok(ESExpr::Float16(f))
224		}
225	).parse(input)
226}
227
228fn float32_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
229	map_res(
230		(
231			tag("#float32:"),
232			opt(one_of("+-")),
233			tag("nan"),
234			opt(preceded(
235				tag(":"),
236				nom::character::complete::u32
237			))
238		),
239		|(_, sign, _, payload)| {
240			let is_neg = sign.is_some_and(|sign| sign == '-');
241
242			let Some(payload) = payload else {
243				if is_neg {
244					return Ok(ESExpr::Float32(-f32::NAN));
245				}
246				else {
247					return Ok(ESExpr::Float32(f32::NAN));
248				}
249			};
250
251			if (payload & 0xFF800000) != 0 {
252				return Err(LexErrorType::InvalidNaNPayload(u64::from(payload)))
253			}
254
255			let sign_bit: u32 = if is_neg { 0x80000000 } else { 0 };
256			let exponent: u32 = 0x7F800000;
257
258			let f = f32::from_bits(sign_bit | exponent | payload);
259
260			Ok(ESExpr::Float32(f))
261		}
262	).parse(input)
263}
264
265fn float64_nan(input: &str) -> IResult<&str, ESExpr<'static>> {
266	map_res(
267		(
268			tag("#float64:"),
269			opt(one_of("+-")),
270			tag("nan"),
271			opt(preceded(
272				tag(":"),
273				nom::character::complete::u64
274			))
275		),
276		|(_, sign, _, payload)| {
277			let is_neg = sign.is_some_and(|sign| sign == '-');
278
279			let Some(payload) = payload else {
280				if is_neg {
281					return Ok(ESExpr::Float64(-f64::NAN));
282				}
283				else {
284					return Ok(ESExpr::Float64(f64::NAN));
285				}
286			};
287
288			if (payload & 0xFFF0000000000000) != 0 {
289				return Err(LexErrorType::InvalidNaNPayload(payload));
290			}
291
292			let sign_bit: u64 = if is_neg { 0x8000000000000000 } else { 0 };
293			let exponent: u64 = 0x7FF0000000000000;
294
295			let f = f64::from_bits(sign_bit | exponent | payload);
296
297			Ok(ESExpr::Float64(f))
298		}
299	).parse(input)
300}
301
302fn float<'a>(input: &'a str) -> IResult<&'a str, ESExpr<'static>> {
303	preceded(
304		skip_ws,
305		alt((
306			float_decimal,
307			float_hex,
308			float16_nan,
309			atom(ESExpr::Float16(f16::INFINITY), "#float16:+inf"),
310			atom(ESExpr::Float16(f16::NEG_INFINITY), "#float16:-inf"),
311			float32_nan,
312			atom(ESExpr::Float32(f32::INFINITY), "#float32:+inf"),
313			atom(ESExpr::Float32(f32::NEG_INFINITY), "#float32:-inf"),
314			float64_nan,
315			atom(ESExpr::Float64(f64::INFINITY), "#float64:+inf"),
316			atom(ESExpr::Float64(f64::NEG_INFINITY), "#float64:-inf"),
317		)),
318	)
319	.parse(input)
320}
321
322/// Parses an input string and extracts an integer of arbitrary size (`BigInt`).
323pub fn integer(input: &str) -> IResult<&str, BigInt> {
324	preceded(
325		skip_ws,
326		map((
327			opt(one_of("+-")),
328			unsigned_integer
329		), |(sign, n)| {
330			let sign = if sign.is_some_and(|s| s == '-') { Sign::Minus } else { Sign::Plus };
331			BigInt::from_biguint(sign, n)
332		}),
333	)
334	.parse(input)
335}
336
337/// Parses an input string and extracts an unsigned integer of arbitrary size (`BigUint`).
338pub fn unsigned_integer(input: &str) -> IResult<&str, BigUint> {
339	preceded(
340		skip_ws,
341		alt((
342			map(
343				preceded(tag_no_case("0x"), hex_digit1),
344				|s: &str| parse_int_base(s, 16),
345			),
346			map(
347				preceded(tag_no_case("0b"), bin_digit1),
348				|s: &str| parse_int_base(s, 2),
349			),
350			map(
351				preceded(tag("0o"), oct_digit1),
352				|s: &str| parse_int_base(s, 8),
353			),
354			map(recognize(digit1), |s: &str| {
355				#[expect(
356					clippy::unwrap_used,
357					reason = "Shouldn't fail because the parser should ensure the format is valid."
358				)]
359				s.parse::<BigUint>().unwrap()
360			}),
361		)),
362	)
363		.parse(input)
364}
365
366fn parse_int_base(s: &str, radix: u32) -> BigUint {
367	let b: Vec<u8> = s
368		.chars()
369		.map(|c| {
370			#[expect(
371				clippy::unwrap_used,
372				reason = "Shouldn't fail because the parser should ensure the format is valid."
373			)]
374			#[expect(
375				clippy::cast_possible_truncation,
376				reason = "Shouldn't be out of range because it is a single digit"
377			)]
378			{
379				c.to_digit(radix).unwrap() as u8
380			}
381		})
382		.collect();
383
384	#[expect(
385		clippy::unwrap_used,
386		reason = "Shouldn't fail because the parser should ensure the format is valid."
387	)]
388	BigUint::from_radix_be(&b, radix).unwrap()
389}
390
391fn string(input: &str) -> IResult<&str, String> {
392	preceded(skip_ws, string_impl('"', "\"\\")).parse(input)
393}
394
395fn string_impl<'a>(
396	quote: char,
397	non_normal_chars: &'static str,
398) -> impl Parser<&'a str, Output = String, Error = nom::error::Error<&'a str>> {
399	move |input| {
400		delimited(
401			char(quote),
402			escaped_transform(
403				none_of(non_normal_chars),
404				'\\',
405				alt((
406					value('\x0C', char('f')),
407					value('\n', char('n')),
408					value('\r', char('r')),
409					value('\t', char('t')),
410					value('\\', char('\\')),
411					value('"', char('"')),
412					value('\'', char('\'')),
413					delimited(
414						tag("u{"),
415						map_res(hex_digit1, |codepoint| -> Result<core::primitive::char, LexErrorType> {
416							#[expect(
417								clippy::unwrap_used,
418								reason = "Shouldn't fail because the parser should ensure the format is valid."
419							)]
420							let codepoint = u32::from_str_radix(codepoint, 16).unwrap();
421							char::from_u32(codepoint).ok_or(LexErrorType::InvalidUnicodeCodePoint(codepoint))
422						}),
423						char('}'),
424					),
425				)),
426			),
427			char(quote),
428		)
429		.parse(input)
430	}
431}
432
433fn binary(input: &str) -> IResult<&str, ESExpr<'static>> {
434	alt((
435		map(
436			delimited(preceded(skip_ws, tag("#\"")), many0(hex_byte), cut(tag("\""))),
437			|b| ESExpr::Array8(Cow::Owned(b)),
438		),
439		map(
440			delimited(
441				preceded(skip_ws, tag("#u8[")),
442				many0(map_res(preceded(skip_ws, integer), u8::try_from)),
443				preceded(skip_ws, cut(tag("]"))),
444			),
445			|b| ESExpr::Array8(Cow::Owned(b)),
446		),
447		map(
448			delimited(
449				preceded(skip_ws, tag("#u16[")),
450				many0(map_res(preceded(skip_ws, integer), u16::try_from)),
451				preceded(skip_ws, cut(tag("]"))),
452			),
453			|b| ESExpr::Array16(Cow::Owned(b)),
454		),
455		map(
456			delimited(
457				preceded(skip_ws, tag("#u32[")),
458				many0(map_res(preceded(skip_ws, integer), u32::try_from)),
459				preceded(skip_ws, cut(tag("]"))),
460			),
461			|b| ESExpr::Array32(Cow::Owned(b)),
462		),
463		map(
464			delimited(
465				preceded(skip_ws, tag("#u64[")),
466				many0(map_res(preceded(skip_ws, integer), u64::try_from)),
467				preceded(skip_ws, cut(tag("]"))),
468			),
469			|b| ESExpr::Array64(Cow::Owned(b)),
470		),
471		map(
472			delimited(
473				preceded(skip_ws, tag("#u128[")),
474				many0(map_res(preceded(skip_ws, integer), u128::try_from)),
475				preceded(skip_ws, cut(tag("]"))),
476			),
477			|b| ESExpr::Array128(Cow::Owned(b)),
478		),
479	))
480	.parse(input)
481}
482
483fn hex_byte(input: &str) -> IResult<&str, u8> {
484	map(take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()), |s| {
485		#[expect(
486			clippy::unwrap_used,
487			reason = "Shouldn't fail because the parser should ensure the format is valid."
488		)]
489		u8::from_str_radix(s, 16).unwrap()
490	})
491	.parse(input)
492}
493
494enum ConstructorArg {
495	Positional(ESExpr<'static>),
496	Keyword(String, ESExpr<'static>),
497}
498
499fn constructor(input: &str) -> IResult<&str, ESExpr<'static>> {
500	map(
501		delimited(
502			preceded(skip_ws, char('(')),
503			pair(identifier, many0(constructor_arg)),
504			preceded(skip_ws, char(')')),
505		),
506		|(name, args)| build_constructor(name, args),
507	)
508	.parse(input)
509}
510
511fn build_constructor(name: String, ctor_args: Vec<ConstructorArg>) -> ESExpr<'static> {
512	let mut args = Vec::new();
513	let mut kwargs = BTreeMap::new();
514
515	for arg in ctor_args {
516		match arg {
517			ConstructorArg::Positional(value) => args.push(value),
518			ConstructorArg::Keyword(name, value) => {
519				kwargs.insert(CowStr::Owned(name), value);
520			},
521		}
522	}
523
524	ESExpr::constructor(name, args, kwargs)
525}
526
527fn constructor_arg(input: &str) -> IResult<&str, ConstructorArg> {
528	alt((
529		map(
530			separated_pair(preceded(skip_ws, identifier), preceded(skip_ws, char(':')), expr),
531			|(name, value)| ConstructorArg::Keyword(name, value),
532		),
533		map(expr, ConstructorArg::Positional),
534	))
535	.parse(input)
536}
537
538fn null_atom(input: &str) -> IResult<&str, ESExpr<'static>> {
539	map((skip_ws, tag("#null"), digit1, not(alphanumeric1)), |(_, _, n, _)| {
540		#[expect(
541			clippy::unwrap_used,
542			reason = "Shouldn't fail because the parser should ensure the format is valid."
543		)]
544		ESExpr::Null(Cow::Owned(BigUint::from_str(n).unwrap()))
545	})
546	.parse(input)
547}
548
549fn atom<'a>(
550	expr: ESExpr<'static>,
551	s: &'static str,
552) -> impl Parser<&'a str, Output = ESExpr<'static>, Error = nom::error::Error<&'a str>> {
553	move |input| value(expr.clone(), preceded(skip_ws, terminated(tag(s), not(alphanumeric1)))).parse(input)
554}
555
556/// Parser for an `ESExpr` expression.
557///
558/// # Errors
559/// Returns `Err` when parsing fails.
560pub fn expr(input: &str) -> IResult<&str, ESExpr<'static>> {
561	alt((
562		float,
563		map(integer, |i| ESExpr::Int(Cow::Owned(i))),
564		map(string, |s| ESExpr::Str(CowStr::Owned(s))),
565		binary,
566		atom(ESExpr::Bool(true), "#true"),
567		atom(ESExpr::Bool(false), "#false"),
568		null_atom,
569		atom(ESExpr::Null(Cow::Owned(BigUint::ZERO)), "#null"),
570		constructor,
571	))
572	.parse(input)
573}
574
575pub(crate) fn expr_file(input: &str) -> IResult<&str, ESExpr<'static>> {
576	terminated(terminated(expr, skip_ws), eof).parse(input)
577}
578
579pub(crate) fn multi_expr_file(input: &str) -> IResult<&str, Vec<ESExpr<'static>>> {
580	terminated(terminated(many0(expr), skip_ws), eof).parse(input)
581}