1use nom::{
2 IResult, Parser,
3 branch::alt,
4 bytes::complete::{tag, take_while1},
5 character::complete::{char, multispace0, multispace1},
6 combinator::{opt, recognize, value},
7 error::ErrorKind,
8 multi::separated_list0,
9 sequence::{pair, preceded, terminated},
10};
11
12use crate::Error;
13use crate::MAX_PARSE_DEPTH;
14use crate::ast::{NumberType, SYMBOL_SPECIAL_CHARS, Value, is_valid_symbol};
15use crate::builtinops::{find_scheme_op, get_quote_op};
16
17fn create_quote_precompiled_op(content: &Value) -> Value {
19 let builtin_op = get_quote_op();
20 Value::PrecompiledOp {
21 op: builtin_op,
22 op_id: builtin_op.scheme_id.into(),
23 args: vec![content.clone()],
24 }
25}
26
27#[derive(Debug, Clone, Copy, PartialEq)]
29enum ShouldPrecompileOps {
30 Yes,
31 No,
32}
33
34fn parse_error_to_message(input: &str, error: nom::Err<nom::error::Error<&str>>) -> String {
36 match error {
37 nom::Err::Error(e) | nom::Err::Failure(e) => {
38 let position = input.len().saturating_sub(e.input.len());
39 match e.code {
40 ErrorKind::Char => format!("Expected character at position {position}"),
41 ErrorKind::Tag => format!("Unexpected token at position {position}"),
42 ErrorKind::TooLarge => {
43 format!("Expression too deeply nested (max depth: {MAX_PARSE_DEPTH})")
44 }
45 _ => {
46 if position < input.len() {
47 let remaining_chars: String =
48 input.chars().skip(position).take(10).collect();
49 format!("Invalid syntax near '{remaining_chars}'")
50 } else {
51 "Unexpected end of input".into()
52 }
53 }
54 }
55 }
56 nom::Err::Incomplete(_) => "Incomplete input".into(),
57 }
58}
59
60fn parse_number(input: &str) -> IResult<&str, Value> {
62 alt((parse_hexadecimal, parse_decimal)).parse(input)
63}
64
65fn parse_decimal(input: &str) -> IResult<&str, Value> {
67 let (input, number_str) = recognize(pair(
68 opt(char('-')),
69 take_while1(|c: char| c.is_ascii_digit()),
70 ))
71 .parse(input)?;
72
73 match number_str.parse::<NumberType>() {
74 Ok(n) => Ok((input, Value::Number(n))),
75 Err(_) => {
76 Err(nom::Err::Error(nom::error::Error::new(
79 input,
80 nom::error::ErrorKind::Digit,
81 )))
82 }
83 }
84}
85
86fn parse_hexadecimal(input: &str) -> IResult<&str, Value> {
88 let (input, _) = char('#').parse(input)?;
89 let (input, _) = alt((char('x'), char('X'))).parse(input)?;
90 let (input, hex_digits) = take_while1(|c: char| c.is_ascii_hexdigit()).parse(input)?;
91
92 match NumberType::from_str_radix(hex_digits, 16) {
93 Ok(n) => Ok((input, Value::Number(n))),
94 Err(_) => Err(nom::Err::Error(nom::error::Error::new(
95 input,
96 nom::error::ErrorKind::HexDigit,
97 ))),
98 }
99}
100
101fn parse_bool(input: &str) -> IResult<&str, Value> {
103 alt((
104 value(Value::Bool(true), tag("#t")),
105 value(Value::Bool(false), tag("#f")),
106 ))
107 .parse(input)
108}
109
110fn parse_symbol(input: &str) -> IResult<&str, Value> {
112 let mut symbol_chars =
113 take_while1(|c: char| c.is_alphanumeric() || SYMBOL_SPECIAL_CHARS.contains(c));
114
115 let (remaining, candidate) = symbol_chars.parse(input)?;
116
117 if is_valid_symbol(candidate) {
118 Ok((remaining, Value::Symbol(candidate.into())))
119 } else {
120 Err(nom::Err::Error(nom::error::Error::new(
121 input,
122 nom::error::ErrorKind::Alpha,
123 )))
124 }
125}
126
127fn parse_string(input: &str) -> IResult<&str, Value> {
129 let (mut remaining, _) = char('"').parse(input)?;
130 let mut chars = Vec::new();
131
132 loop {
133 let mut char_iter = remaining.chars();
134 match char_iter.next() {
135 Some('"') => {
136 return Ok((
138 char_iter.as_str(),
139 Value::String(chars.into_iter().collect()),
140 ));
141 }
142 Some('\\') => {
143 match char_iter.next() {
145 Some('n') => chars.push('\n'),
146 Some('t') => chars.push('\t'),
147 Some('r') => chars.push('\r'),
148 Some('\\') => chars.push('\\'),
149 Some('"') => chars.push('"'),
150 Some(_) => {
151 return Err(nom::Err::Error(nom::error::Error::new(
153 remaining,
154 nom::error::ErrorKind::Char,
155 )));
156 }
157 None => {
158 return Err(nom::Err::Error(nom::error::Error::new(
160 remaining,
161 nom::error::ErrorKind::Char,
162 )));
163 }
164 }
165 remaining = char_iter.as_str();
167 }
168 Some(ch) => {
169 chars.push(ch);
171 remaining = char_iter.as_str();
172 }
173 None => {
174 return Err(nom::Err::Error(nom::error::Error::new(
176 remaining,
177 nom::error::ErrorKind::Char,
178 )));
179 }
180 }
181 }
182}
183
184fn parse_list(
186 input: &str,
187 should_precompile: ShouldPrecompileOps,
188 depth: usize,
189) -> IResult<&str, Value> {
190 let (input, _) = char('(').parse(input)?;
192 let (input, _) = multispace0.parse(input)?;
193
194 let (input, is_quote) = opt(tag("quote")).parse(input)?;
196
197 if is_quote.is_some() {
198 let (input, _) = multispace1.parse(input)?;
200 let (input, content) = parse_sexpr(input, ShouldPrecompileOps::No, depth + 1)?;
201 let (input, _) = multispace0.parse(input)?;
202 let (input, _) = char(')').parse(input)?;
203
204 if should_precompile == ShouldPrecompileOps::Yes {
206 let precompiled = create_quote_precompiled_op(&content);
207 return Ok((input, precompiled));
208 }
209
210 return Ok((
212 input,
213 Value::List(vec![Value::Symbol("quote".into()), content]),
214 ));
215 }
216
217 let (input, elements) = separated_list0(multispace1, |input| {
219 parse_sexpr(input, should_precompile, depth + 1)
220 })
221 .parse(input)?;
222
223 let (input, _) = multispace0.parse(input)?;
225 let (input, _) = char(')').parse(input)?;
226
227 if should_precompile == ShouldPrecompileOps::Yes
229 && let [Value::Symbol(op_name), args @ ..] = elements.as_slice()
230 && let Some(builtin_op) = find_scheme_op(op_name.as_str())
231 {
232 return Ok((
233 input,
234 Value::PrecompiledOp {
235 op: builtin_op,
236 op_id: builtin_op.scheme_id.into(),
237 args: args.to_vec(),
238 },
239 ));
240 }
241
242 Ok((input, Value::List(elements)))
243}
244
245fn parse_sexpr(
247 input: &str,
248 should_precompile: ShouldPrecompileOps,
249 depth: usize,
250) -> IResult<&str, Value> {
251 if depth >= MAX_PARSE_DEPTH {
252 return Err(nom::Err::Error(nom::error::Error::new(
253 input,
254 nom::error::ErrorKind::TooLarge,
255 )));
256 }
257 preceded(
258 multispace0,
259 alt((
260 |input| parse_quote(input, should_precompile, depth), |input| parse_list(input, should_precompile, depth),
262 parse_number,
263 parse_bool,
264 parse_string,
265 parse_symbol,
266 )),
267 )
268 .parse(input)
269}
270
271fn parse_quote(
273 input: &str,
274 should_precompile: ShouldPrecompileOps,
275 depth: usize,
276) -> IResult<&str, Value> {
277 let (input, _) = char('\'').parse(input)?;
278 let (input, expr) = parse_sexpr(input, ShouldPrecompileOps::No, depth + 1)?; if should_precompile == ShouldPrecompileOps::Yes {
282 let precompiled = create_quote_precompiled_op(&expr);
283 return Ok((input, precompiled));
284 }
285
286 Ok((
288 input,
289 Value::List(vec![Value::Symbol("quote".into()), expr]),
290 ))
291}
292
293pub fn parse_scheme(input: &str) -> Result<Value, Error> {
295 match terminated(
296 |input| parse_sexpr(input, ShouldPrecompileOps::Yes, 0),
297 multispace0,
298 )
299 .parse(input)
300 {
301 Ok(("", value)) => {
302 validate_arity_in_ast(&value)?;
304 Ok(value)
305 }
306 Ok((remaining, _)) => Err(Error::ParseError(format!(
307 "Unexpected remaining input: '{remaining}'"
308 ))),
309 Err(e) => Err(Error::ParseError(parse_error_to_message(input, e))),
310 }
311}
312
313fn validate_arity_in_ast(value: &Value) -> Result<(), Error> {
315 match value {
316 Value::PrecompiledOp { op, args, .. } => {
317 if let Err(Error::ArityError { expected, got, .. }) = op.validate_arity(args.len()) {
319 return Err(Error::arity_error_with_expr(
320 expected,
321 got,
322 format!("{}", value.to_uncompiled_form()),
323 ));
324 }
325 for arg in args {
327 validate_arity_in_ast(arg)?;
328 }
329 }
330 Value::List(elements) => {
331 for element in elements {
333 validate_arity_in_ast(element)?;
334 }
335 }
336 _ => {} }
338 Ok(())
339}
340
341#[cfg(test)]
342#[expect(clippy::unwrap_used)] mod tests {
344 use super::*;
345 use crate::ast::{nil, sym, val};
346
347 #[derive(Debug)]
349 enum ParseTestResult {
350 Success(Value), SuccessPrecompiledOp(&'static str, Vec<Value>), SemanticallyEquivalent(Value), SpecificError(&'static str), Error, }
356 use ParseTestResult::*;
357
358 fn success<T: Into<Value>>(value: T) -> ParseTestResult {
360 Success(value.into())
361 }
362
363 fn precompiled_op(scheme_id: &'static str, args: Vec<Value>) -> ParseTestResult {
365 SuccessPrecompiledOp(scheme_id, args)
366 }
367
368 fn semantically_equivalent<T: Into<Value>>(value: T) -> ParseTestResult {
370 SemanticallyEquivalent(value.into())
371 }
372
373 fn run_parse_tests(test_cases: Vec<(&str, ParseTestResult)>) {
375 for (i, (input, expected)) in test_cases.iter().enumerate() {
376 let test_id = format!("Parse test #{}", i + 1);
377 let result = parse_scheme(input);
378
379 match (result, expected) {
380 (Ok(actual), Success(expected_val)) => {
382 assert_eq!(actual, *expected_val, "{test_id}: value mismatch");
383
384 let displayed = format!("{actual}");
386 let reparsed = parse_scheme(&displayed).unwrap_or_else(|e| {
387 panic!("{test_id}: round-trip parse failed for '{displayed}': {e:?}")
388 });
389 let redisplayed = format!("{reparsed}");
390 assert_eq!(
391 displayed, redisplayed,
392 "{test_id}: round-trip display mismatch for '{input}'"
393 );
394 }
395
396 (Ok(actual), SuccessPrecompiledOp(expected_scheme_id, expected_args)) => {
397 if let Value::PrecompiledOp { op_id, args, .. } = &actual {
398 assert_eq!(op_id, expected_scheme_id, "{test_id}: scheme_id mismatch");
399 assert_eq!(args, expected_args, "{test_id}: args mismatch");
400
401 let displayed = format!("{actual}");
403 let reparsed = parse_scheme(&displayed).unwrap_or_else(|e| {
404 panic!("{test_id}: round-trip parse failed for '{displayed}': {e:?}")
405 });
406 let redisplayed = format!("{reparsed}");
407 assert_eq!(
408 displayed, redisplayed,
409 "{test_id}: round-trip display mismatch for '{input}'"
410 );
411 } else {
412 panic!("{test_id}: expected PrecompiledOp, got {actual:?}");
413 }
414 }
415
416 (Ok(actual), SemanticallyEquivalent(expected_val)) => {
417 let actual_uncompiled = actual.to_uncompiled_form();
420 assert_eq!(
421 actual_uncompiled, *expected_val,
422 "{test_id}: semantic equivalence mismatch"
423 );
424
425 let displayed = format!("{actual}");
427 let reparsed = parse_scheme(&displayed).unwrap_or_else(|e| {
428 panic!("{test_id}: round-trip parse failed for '{displayed}': {e:?}")
429 });
430 let redisplayed = format!("{reparsed}");
431 assert_eq!(
432 displayed, redisplayed,
433 "{test_id}: round-trip display mismatch for '{input}'"
434 );
435 }
436
437 (Err(_), Error) => {} (Err(err), SpecificError(expected_text)) => {
440 let error_msg = format!("{err:?}");
441 assert!(
442 error_msg.contains(expected_text),
443 "{test_id}: error should contain '{expected_text}'"
444 );
445 }
446
447 (Ok(actual), Error) => {
449 panic!("{test_id}: expected error, got {actual:?}");
450 }
451 (Ok(actual), SpecificError(expected_text)) => {
452 panic!(
453 "{test_id}: expected error containing '{expected_text}', got {actual:?}"
454 );
455 }
456 (Err(err), Success(_)) => {
457 panic!("{test_id}: expected success, got error {err:?}");
458 }
459 (Err(err), SuccessPrecompiledOp(_, _)) => {
460 panic!("{test_id}: expected PrecompiledOp, got error {err:?}");
461 }
462 (Err(err), SemanticallyEquivalent(_)) => {
463 panic!("{test_id}: expected SemanticallyEquivalent, got error {err:?}");
464 }
465 }
466 }
467 }
468
469 #[test]
470 #[expect(clippy::too_many_lines)] fn test_parser_comprehensive() {
472 use crate::builtinops::find_scheme_op;
473
474 let test_cases = vec![
475 ("42", success(42)),
478 ("-5", success(-5)),
479 ("0", success(0)),
480 ("-0", success(0)),
481 ("#x1A", success(26)),
483 ("#X1a", success(26)), ("#xff", success(255)),
485 ("#x0", success(0)),
486 ("#x12345", success(74565)),
487 ("9223372036854775807", success(i64::MAX)),
489 ("-9223372036854775808", success(i64::MIN)),
490 ("3.14", Error), ("#xG", Error), ("#x", Error), ("#y123", Error), ("123abc", Error), ("99999999999999999999", Error), ("-99999999999999999999", Error), ("foo", success(sym("foo"))),
501 ("+", success(sym("+"))),
502 (">=", success(sym(">="))),
503 ("test-name", success(sym("test-name"))),
505 ("test*name", success(sym("test*name"))),
506 ("test/name", success(sym("test/name"))),
507 ("test<name", success(sym("test<name"))),
508 ("test=name", success(sym("test=name"))),
509 ("test>name", success(sym("test>name"))),
510 ("test!name", success(sym("test!name"))),
511 ("test?name", success(sym("test?name"))),
512 ("test_name", success(sym("test_name"))),
513 ("test$name", success(sym("test$name"))),
514 ("var123", success(sym("var123"))),
516 ("-", success(sym("-"))),
517 ("-abc", success(sym("-abc"))),
518 ("123var", Error),
520 ("-42name", Error),
521 ("test space", Error),
522 ("test@home", Error),
523 ("test#tag", Error),
524 ("test%percent", Error),
525 ("#t", success(true)),
528 ("#f", success(false)),
529 ("#T", Error),
531 ("#F", Error),
532 ("#true", Error),
533 ("#false", Error),
534 ("\"hello\"", success("hello")),
537 ("\"hello world\"", success("hello world")),
538 (r#""hello\nworld""#, success("hello\nworld")),
540 (r#""tab\there""#, success("tab\there")),
541 (r#""carriage\rreturn""#, success("carriage\rreturn")),
542 (r#""quote\"test""#, success("quote\"test")),
543 (r#""backslash\\test""#, success("backslash\\test")),
544 (r#""other\xchar""#, Error), (r#""test\zchar""#, Error), ("\"\"", success("")),
549 (r#""unterminated"#, Error),
551 (r#""unterminated\"#, Error), (r#""test\""#, Error), ("()", success(nil())),
555 ("(42)", success([42])),
558 (
560 "(1 hello \"world\" #t)",
561 success([val(1), sym("hello"), val("world"), val(true)]),
562 ),
563 ("(1 2 3)", success([1, 2, 3])),
565 ("(+ 1 2)", precompiled_op("+", vec![val(1), val(2)])),
567 (
568 "(* 3 4 5)",
569 precompiled_op("*", vec![val(3), val(4), val(5)]),
570 ),
571 ("(< 1 2)", precompiled_op("<", vec![val(1), val(2)])),
572 (
573 "(if #t 1 2)",
574 precompiled_op("if", vec![val(true), val(1), val(2)]),
575 ),
576 ("(foo 1 2)", success([sym("foo"), val(1), val(2)])),
578 ("(a b c)", success([sym("a"), sym("b"), sym("c")])),
580 (
582 "(42 is the answer)",
583 success([val(42), sym("is"), sym("the"), sym("answer")]),
584 ),
585 (
588 "'foo",
589 semantically_equivalent(val([sym("quote"), sym("foo")])),
590 ),
591 (
592 "'(1 2 3)",
593 semantically_equivalent(val([sym("quote"), val([1, 2, 3])])),
594 ),
595 ("'()", semantically_equivalent(val([sym("quote"), nil()]))),
596 ("(quote foo)", precompiled_op("quote", vec![sym("foo")])),
598 (
599 "(quote (1 2 3))",
600 precompiled_op("quote", vec![val([1, 2, 3])]),
601 ),
602 ("((1 2) (3 4))", success([[1, 2], [3, 4]])),
604 (
606 "((+ 1 2) (foo bar))",
607 success([
608 val(Value::PrecompiledOp {
609 op: find_scheme_op("+").unwrap(),
610 op_id: "+".into(),
611 args: vec![val(1), val(2)],
612 }),
613 val([sym("foo"), sym("bar")]),
614 ]),
615 ),
616 (
618 "(car (list 1 2 3))",
619 precompiled_op(
620 "car",
621 vec![val(Value::PrecompiledOp {
622 op: find_scheme_op("list").unwrap(),
623 op_id: "list".into(),
624 args: vec![val(1), val(2), val(3)],
625 })],
626 ),
627 ),
628 (" 42 ", success(42)),
631 ("\t#t\n", success(true)),
632 ("\r\n foo \t", success(sym("foo"))),
633 ("( 1 2\t\n3 )", success([1, 2, 3])),
635 ("( )", success(nil())),
637 ("(\t\n)", success(nil())),
638 ("(((1)))", success([val([val([val(1)])])])),
641 (
643 "(foo (\"bar\" #t) -123)",
644 success([sym("foo"), val([val("bar"), val(true)]), val(-123)]),
645 ),
646 ("(1 2 3", Error), ("1 2 3)", Error), ("((1 2)", Error), ("", Error),
653 (" ", Error), (")", Error),
656 ("@invalid", Error),
657 ("1 2", Error),
659 ("(+ 1 2) (+ 3 4)", Error),
660 ("(if #t 1)", SpecificError("ArityError")), ("(if #t 42 0 extra)", SpecificError("ArityError")), ("(if)", SpecificError("ArityError")), ("(and)", SpecificError("ArityError")), ("(or)", SpecificError("ArityError")), ("(not)", SpecificError("ArityError")), ("(not #t #f)", SpecificError("ArityError")), ("(car)", SpecificError("ArityError")), ("(car (list 1 2) extra)", SpecificError("ArityError")), ("(cdr)", SpecificError("ArityError")), ("(cdr (list 1 2) extra)", SpecificError("ArityError")), (
684 "(if #t 1 2)",
685 precompiled_op("if", vec![val(true), val(1), val(2)]),
686 ),
687 (
688 "(and #t #f)",
689 precompiled_op("and", vec![val(true), val(false)]),
690 ),
691 (
692 "(or #f #t)",
693 precompiled_op("or", vec![val(false), val(true)]),
694 ),
695 ("(not #f)", precompiled_op("not", vec![val(false)])),
696 ("(list (not) 42)", SpecificError("ArityError")),
698 ("(+ 1 (- 2", SpecificError("ParseError")),
701 ("1 2 3)", SpecificError("ParseError")),
703 ("(1 2))", SpecificError("ParseError")),
704 (")", SpecificError("ParseError")),
706 ("@invalid", SpecificError("ParseError")),
710 ("", SpecificError("ParseError")),
712 (" ", SpecificError("ParseError")),
713 ("\t\n", SpecificError("ParseError")),
714 ("1 2", SpecificError("ParseError")),
716 ("(+ 1 2) (+ 3 4)", SpecificError("ParseError")),
717 ("42 #t", SpecificError("ParseError")),
718 ("symbol", success(sym("symbol"))), ];
721
722 run_parse_tests(test_cases);
723 }
724
725 #[test]
726 fn test_parser_depth_limits() {
727 let parens_under_limit = format!(
729 "{}unbound{}",
730 "(".repeat(MAX_PARSE_DEPTH - 1),
731 ")".repeat(MAX_PARSE_DEPTH - 1)
732 );
733 let quotes_under_limit = format!("{}unbound", "'".repeat(MAX_PARSE_DEPTH - 1));
734 let deep_parens_at_limit = format!(
735 "{}1{}",
736 "(".repeat(MAX_PARSE_DEPTH),
737 ")".repeat(MAX_PARSE_DEPTH)
738 );
739 let deep_quotes_at_limit = format!("{}a", "'".repeat(MAX_PARSE_DEPTH));
740
741 let depth_test_cases = vec![
742 (
744 deep_parens_at_limit.as_str(),
745 SpecificError("Invalid syntax"),
746 ),
747 (
748 deep_quotes_at_limit.as_str(),
749 SpecificError("Invalid syntax"),
750 ),
751 ];
752
753 run_parse_tests(depth_test_cases);
754
755 assert!(
757 parse_scheme(&parens_under_limit).is_ok(),
758 "Parens just under depth limit should parse successfully"
759 );
760 assert!(
761 parse_scheme("es_under_limit).is_ok(),
762 "Quotes just under depth limit should parse successfully"
763 );
764 }
765}