rulesxp/
scheme.rs

1use nom::{
2    IResult, Parser,
3    branch::alt,
4    bytes::complete::{tag, take_while1},
5    character::complete::{char, multispace0, multispace1},
6    combinator::{opt, recognize, value},
7    error::ErrorKind,
8    multi::separated_list0,
9    sequence::{pair, preceded, terminated},
10};
11
12use crate::Error;
13use crate::MAX_PARSE_DEPTH;
14use crate::ast::{NumberType, SYMBOL_SPECIAL_CHARS, Value, is_valid_symbol};
15use crate::builtinops::{find_scheme_op, get_quote_op};
16
17/// Helper function to create a quote PrecompiledOp
18fn create_quote_precompiled_op(content: &Value) -> Value {
19    let builtin_op = get_quote_op();
20    Value::PrecompiledOp {
21        op: builtin_op,
22        op_id: builtin_op.scheme_id.into(),
23        args: vec![content.clone()],
24    }
25}
26
27/// Control whether builtin operations should be precompiled during parsing
28#[derive(Debug, Clone, Copy, PartialEq)]
29enum ShouldPrecompileOps {
30    Yes,
31    No,
32}
33
34/// Convert nom parsing errors to user-friendly messages
35fn parse_error_to_message(input: &str, error: nom::Err<nom::error::Error<&str>>) -> String {
36    match error {
37        nom::Err::Error(e) | nom::Err::Failure(e) => {
38            let position = input.len().saturating_sub(e.input.len());
39            match e.code {
40                ErrorKind::Char => format!("Expected character at position {position}"),
41                ErrorKind::Tag => format!("Unexpected token at position {position}"),
42                ErrorKind::TooLarge => {
43                    format!("Expression too deeply nested (max depth: {MAX_PARSE_DEPTH})")
44                }
45                _ => {
46                    if position < input.len() {
47                        let remaining_chars: String =
48                            input.chars().skip(position).take(10).collect();
49                        format!("Invalid syntax near '{remaining_chars}'")
50                    } else {
51                        "Unexpected end of input".into()
52                    }
53                }
54            }
55        }
56        nom::Err::Incomplete(_) => "Incomplete input".into(),
57    }
58}
59
60/// Parse a number (integer only, supports decimal and hexadecimal)
61fn parse_number(input: &str) -> IResult<&str, Value> {
62    alt((parse_hexadecimal, parse_decimal)).parse(input)
63}
64
65/// Parse a decimal number
66fn parse_decimal(input: &str) -> IResult<&str, Value> {
67    let (input, number_str) = recognize(pair(
68        opt(char('-')),
69        take_while1(|c: char| c.is_ascii_digit()),
70    ))
71    .parse(input)?;
72
73    match number_str.parse::<NumberType>() {
74        Ok(n) => Ok((input, Value::Number(n))),
75        Err(_) => {
76            // Parse failed - could be due to overflow or invalid format
77            // Symbol parsing will reject this anyway since it starts with digits
78            Err(nom::Err::Error(nom::error::Error::new(
79                input,
80                nom::error::ErrorKind::Digit,
81            )))
82        }
83    }
84}
85
86/// Parse a hexadecimal number (#x or #X prefix)
87fn parse_hexadecimal(input: &str) -> IResult<&str, Value> {
88    let (input, _) = char('#').parse(input)?;
89    let (input, _) = alt((char('x'), char('X'))).parse(input)?;
90    let (input, hex_digits) = take_while1(|c: char| c.is_ascii_hexdigit()).parse(input)?;
91
92    match NumberType::from_str_radix(hex_digits, 16) {
93        Ok(n) => Ok((input, Value::Number(n))),
94        Err(_) => Err(nom::Err::Error(nom::error::Error::new(
95            input,
96            nom::error::ErrorKind::HexDigit,
97        ))),
98    }
99}
100
101/// Parse a boolean (#t or #f)
102fn parse_bool(input: &str) -> IResult<&str, Value> {
103    alt((
104        value(Value::Bool(true), tag("#t")),
105        value(Value::Bool(false), tag("#f")),
106    ))
107    .parse(input)
108}
109
110/// Parse a symbol (identifier)
111fn parse_symbol(input: &str) -> IResult<&str, Value> {
112    let mut symbol_chars =
113        take_while1(|c: char| c.is_alphanumeric() || SYMBOL_SPECIAL_CHARS.contains(c));
114
115    let (remaining, candidate) = symbol_chars.parse(input)?;
116
117    if is_valid_symbol(candidate) {
118        Ok((remaining, Value::Symbol(candidate.into())))
119    } else {
120        Err(nom::Err::Error(nom::error::Error::new(
121            input,
122            nom::error::ErrorKind::Alpha,
123        )))
124    }
125}
126
127/// Parse a string literal
128fn parse_string(input: &str) -> IResult<&str, Value> {
129    let (mut remaining, _) = char('"').parse(input)?;
130    let mut chars = Vec::new();
131
132    loop {
133        let mut char_iter = remaining.chars();
134        match char_iter.next() {
135            Some('"') => {
136                // End of string - remaining is what's left after consuming the quote
137                return Ok((
138                    char_iter.as_str(),
139                    Value::String(chars.into_iter().collect()),
140                ));
141            }
142            Some('\\') => {
143                // Handle escape sequences
144                match char_iter.next() {
145                    Some('n') => chars.push('\n'),
146                    Some('t') => chars.push('\t'),
147                    Some('r') => chars.push('\r'),
148                    Some('\\') => chars.push('\\'),
149                    Some('"') => chars.push('"'),
150                    Some(_) => {
151                        // Unknown escape sequence - return error
152                        return Err(nom::Err::Error(nom::error::Error::new(
153                            remaining,
154                            nom::error::ErrorKind::Char,
155                        )));
156                    }
157                    None => {
158                        // Incomplete escape sequence (backslash at end)
159                        return Err(nom::Err::Error(nom::error::Error::new(
160                            remaining,
161                            nom::error::ErrorKind::Char,
162                        )));
163                    }
164                }
165                // After consuming escape sequence, remaining is what the iterator has left
166                remaining = char_iter.as_str();
167            }
168            Some(ch) => {
169                // Regular character
170                chars.push(ch);
171                remaining = char_iter.as_str();
172            }
173            None => {
174                // Reached end of input without finding closing quote
175                return Err(nom::Err::Error(nom::error::Error::new(
176                    remaining,
177                    nom::error::ErrorKind::Char,
178                )));
179            }
180        }
181    }
182}
183
184/// Parse a list with configurable precompilation behavior (performance optimized)
185fn parse_list(
186    input: &str,
187    should_precompile: ShouldPrecompileOps,
188    depth: usize,
189) -> IResult<&str, Value> {
190    // Parse opening parenthesis and whitespace
191    let (input, _) = char('(').parse(input)?;
192    let (input, _) = multispace0.parse(input)?;
193
194    // Early quote detection to avoid backtracking
195    let (input, is_quote) = opt(tag("quote")).parse(input)?;
196
197    if is_quote.is_some() {
198        // Handle quote specially - parse exactly one more element unprecompiled
199        let (input, _) = multispace1.parse(input)?;
200        let (input, content) = parse_sexpr(input, ShouldPrecompileOps::No, depth + 1)?;
201        let (input, _) = multispace0.parse(input)?;
202        let (input, _) = char(')').parse(input)?;
203
204        // If precompilation is enabled, create a PrecompiledOp
205        if should_precompile == ShouldPrecompileOps::Yes {
206            let precompiled = create_quote_precompiled_op(&content);
207            return Ok((input, precompiled));
208        }
209
210        // Fallback to unprecompiled list representation (only when precompilation disabled)
211        return Ok((
212            input,
213            Value::List(vec![Value::Symbol("quote".into()), content]),
214        ));
215    }
216
217    // Regular list parsing - parse all elements in one pass
218    let (input, elements) = separated_list0(multispace1, |input| {
219        parse_sexpr(input, should_precompile, depth + 1)
220    })
221    .parse(input)?;
222
223    // Parse closing parenthesis and whitespace
224    let (input, _) = multispace0.parse(input)?;
225    let (input, _) = char(')').parse(input)?;
226
227    // Apply precompilation if enabled - single lookup, no repeated string comparison
228    if should_precompile == ShouldPrecompileOps::Yes
229        && let [Value::Symbol(op_name), args @ ..] = elements.as_slice()
230        && let Some(builtin_op) = find_scheme_op(op_name.as_str())
231    {
232        return Ok((
233            input,
234            Value::PrecompiledOp {
235                op: builtin_op,
236                op_id: builtin_op.scheme_id.into(),
237                args: args.to_vec(),
238            },
239        ));
240    }
241
242    Ok((input, Value::List(elements)))
243}
244
245/// Parse an S-expression with configurable precompilation behavior
246fn parse_sexpr(
247    input: &str,
248    should_precompile: ShouldPrecompileOps,
249    depth: usize,
250) -> IResult<&str, Value> {
251    if depth >= MAX_PARSE_DEPTH {
252        return Err(nom::Err::Error(nom::error::Error::new(
253            input,
254            nom::error::ErrorKind::TooLarge,
255        )));
256    }
257    preceded(
258        multispace0,
259        alt((
260            |input| parse_quote(input, should_precompile, depth), // Pass precompilation setting to quote
261            |input| parse_list(input, should_precompile, depth),
262            parse_number,
263            parse_bool,
264            parse_string,
265            parse_symbol,
266        )),
267    )
268    .parse(input)
269}
270
271/// Parse quoted expression ('expr -> (quote expr))
272fn parse_quote(
273    input: &str,
274    should_precompile: ShouldPrecompileOps,
275    depth: usize,
276) -> IResult<&str, Value> {
277    let (input, _) = char('\'').parse(input)?;
278    let (input, expr) = parse_sexpr(input, ShouldPrecompileOps::No, depth + 1)?; // Use unprecompiled parsing for quoted content
279
280    // Create PrecompiledOp only if precompilation is enabled
281    if should_precompile == ShouldPrecompileOps::Yes {
282        let precompiled = create_quote_precompiled_op(&expr);
283        return Ok((input, precompiled));
284    }
285
286    // Fallback to unprecompiled representation (only when precompilation disabled)
287    Ok((
288        input,
289        Value::List(vec![Value::Symbol("quote".into()), expr]),
290    ))
291}
292
293/// Parse a complete S-expression from input with optimization enabled
294pub fn parse_scheme(input: &str) -> Result<Value, Error> {
295    match terminated(
296        |input| parse_sexpr(input, ShouldPrecompileOps::Yes, 0),
297        multispace0,
298    )
299    .parse(input)
300    {
301        Ok(("", value)) => {
302            // After successful parsing, validate arity for any PrecompiledOp
303            validate_arity_in_ast(&value)?;
304            Ok(value)
305        }
306        Ok((remaining, _)) => Err(Error::ParseError(format!(
307            "Unexpected remaining input: '{remaining}'"
308        ))),
309        Err(e) => Err(Error::ParseError(parse_error_to_message(input, e))),
310    }
311}
312
313/// Recursively validate arity in parsed AST - simpler than threading through parser
314fn validate_arity_in_ast(value: &Value) -> Result<(), Error> {
315    match value {
316        Value::PrecompiledOp { op, args, .. } => {
317            // Validate this operation's arity
318            if let Err(Error::ArityError { expected, got, .. }) = op.validate_arity(args.len()) {
319                return Err(Error::arity_error_with_expr(
320                    expected,
321                    got,
322                    format!("{}", value.to_uncompiled_form()),
323                ));
324            }
325            // Recursively validate nested expressions
326            for arg in args {
327                validate_arity_in_ast(arg)?;
328            }
329        }
330        Value::List(elements) => {
331            // Recursively validate list elements
332            for element in elements {
333                validate_arity_in_ast(element)?;
334            }
335        }
336        _ => {} // Other value types don't need validation
337    }
338    Ok(())
339}
340
341#[cfg(test)]
342#[expect(clippy::unwrap_used)] // test code OK
343mod tests {
344    use super::*;
345    use crate::ast::{nil, sym, val};
346
347    /// Test result variants for comprehensive parsing tests
348    #[derive(Debug)]
349    enum ParseTestResult {
350        Success(Value), // Parsing should succeed with this value
351        SuccessPrecompiledOp(&'static str, Vec<Value>), // Should succeed with PrecompiledOp(scheme_id, args)
352        SemanticallyEquivalent(Value), // Should succeed and be semantically equivalent (for quote shorthand)
353        SpecificError(&'static str),   // Parsing should fail with error containing this string
354        Error,                         // Parsing should fail (any error)
355    }
356    use ParseTestResult::*;
357
358    /// Helper for successful parse test cases
359    fn success<T: Into<Value>>(value: T) -> ParseTestResult {
360        Success(value.into())
361    }
362
363    /// Helper for PrecompiledOp test cases
364    fn precompiled_op(scheme_id: &'static str, args: Vec<Value>) -> ParseTestResult {
365        SuccessPrecompiledOp(scheme_id, args)
366    }
367
368    /// Helper for SemanticallyEquivalent test cases (used for quote shorthand)
369    fn semantically_equivalent<T: Into<Value>>(value: T) -> ParseTestResult {
370        SemanticallyEquivalent(value.into())
371    }
372
373    /// Run comprehensive parse tests with simplified error reporting and round-trip validation
374    fn run_parse_tests(test_cases: Vec<(&str, ParseTestResult)>) {
375        for (i, (input, expected)) in test_cases.iter().enumerate() {
376            let test_id = format!("Parse test #{}", i + 1);
377            let result = parse_scheme(input);
378
379            match (result, expected) {
380                // Success cases with round-trip testing
381                (Ok(actual), Success(expected_val)) => {
382                    assert_eq!(actual, *expected_val, "{test_id}: value mismatch");
383
384                    // Test round-trip: display -> parse -> display should be identical
385                    let displayed = format!("{actual}");
386                    let reparsed = parse_scheme(&displayed).unwrap_or_else(|e| {
387                        panic!("{test_id}: round-trip parse failed for '{displayed}': {e:?}")
388                    });
389                    let redisplayed = format!("{reparsed}");
390                    assert_eq!(
391                        displayed, redisplayed,
392                        "{test_id}: round-trip display mismatch for '{input}'"
393                    );
394                }
395
396                (Ok(actual), SuccessPrecompiledOp(expected_scheme_id, expected_args)) => {
397                    if let Value::PrecompiledOp { op_id, args, .. } = &actual {
398                        assert_eq!(op_id, expected_scheme_id, "{test_id}: scheme_id mismatch");
399                        assert_eq!(args, expected_args, "{test_id}: args mismatch");
400
401                        // Test round-trip for PrecompiledOp
402                        let displayed = format!("{actual}");
403                        let reparsed = parse_scheme(&displayed).unwrap_or_else(|e| {
404                            panic!("{test_id}: round-trip parse failed for '{displayed}': {e:?}")
405                        });
406                        let redisplayed = format!("{reparsed}");
407                        assert_eq!(
408                            displayed, redisplayed,
409                            "{test_id}: round-trip display mismatch for '{input}'"
410                        );
411                    } else {
412                        panic!("{test_id}: expected PrecompiledOp, got {actual:?}");
413                    }
414                }
415
416                (Ok(actual), SemanticallyEquivalent(expected_val)) => {
417                    // For semantically equivalent cases, we compare the uncompiled forms
418                    // This is useful for quote shorthand ('foo vs (quote foo))
419                    let actual_uncompiled = actual.to_uncompiled_form();
420                    assert_eq!(
421                        actual_uncompiled, *expected_val,
422                        "{test_id}: semantic equivalence mismatch"
423                    );
424
425                    // Test round-trip for SemanticallyEquivalent
426                    let displayed = format!("{actual}");
427                    let reparsed = parse_scheme(&displayed).unwrap_or_else(|e| {
428                        panic!("{test_id}: round-trip parse failed for '{displayed}': {e:?}")
429                    });
430                    let redisplayed = format!("{reparsed}");
431                    assert_eq!(
432                        displayed, redisplayed,
433                        "{test_id}: round-trip display mismatch for '{input}'"
434                    );
435                }
436
437                // Error cases (success)
438                (Err(_), Error) => {} // Generic error case passes
439                (Err(err), SpecificError(expected_text)) => {
440                    let error_msg = format!("{err:?}");
441                    assert!(
442                        error_msg.contains(expected_text),
443                        "{test_id}: error should contain '{expected_text}'"
444                    );
445                }
446
447                // Mismatched cases (failures)
448                (Ok(actual), Error) => {
449                    panic!("{test_id}: expected error, got {actual:?}");
450                }
451                (Ok(actual), SpecificError(expected_text)) => {
452                    panic!(
453                        "{test_id}: expected error containing '{expected_text}', got {actual:?}"
454                    );
455                }
456                (Err(err), Success(_)) => {
457                    panic!("{test_id}: expected success, got error {err:?}");
458                }
459                (Err(err), SuccessPrecompiledOp(_, _)) => {
460                    panic!("{test_id}: expected PrecompiledOp, got error {err:?}");
461                }
462                (Err(err), SemanticallyEquivalent(_)) => {
463                    panic!("{test_id}: expected SemanticallyEquivalent, got error {err:?}");
464                }
465            }
466        }
467    }
468
469    #[test]
470    #[expect(clippy::too_many_lines)] // Comprehensive test coverage is intentionally thorough
471    fn test_parser_comprehensive() {
472        use crate::builtinops::find_scheme_op;
473
474        let test_cases = vec![
475            // ===== NUMBER PARSING =====
476            // Decimal numbers
477            ("42", success(42)),
478            ("-5", success(-5)),
479            ("0", success(0)),
480            ("-0", success(0)),
481            // Hexadecimal numbers
482            ("#x1A", success(26)),
483            ("#X1a", success(26)), // Test case-insensitivity
484            ("#xff", success(255)),
485            ("#x0", success(0)),
486            ("#x12345", success(74565)),
487            // Edge cases - large integer literals
488            ("9223372036854775807", success(i64::MAX)),
489            ("-9223372036854775808", success(i64::MIN)),
490            // Number parsing failures
491            ("3.14", Error),                  // Floating point should fail
492            ("#xG", Error),                   // Invalid hexadecimal should fail
493            ("#x", Error),                    // Incomplete hex should fail
494            ("#y123", Error),                 // Invalid hex prefix should fail
495            ("123abc", Error),                // Mixed should fail
496            ("99999999999999999999", Error),  // Too large for NumberType
497            ("-99999999999999999999", Error), // Too small for NumberType
498            // ===== SYMBOL PARSING =====
499            // Basic symbols
500            ("foo", success(sym("foo"))),
501            ("+", success(sym("+"))),
502            (">=", success(sym(">="))),
503            // Test all allowed special characters
504            ("test-name", success(sym("test-name"))),
505            ("test*name", success(sym("test*name"))),
506            ("test/name", success(sym("test/name"))),
507            ("test<name", success(sym("test<name"))),
508            ("test=name", success(sym("test=name"))),
509            ("test>name", success(sym("test>name"))),
510            ("test!name", success(sym("test!name"))),
511            ("test?name", success(sym("test?name"))),
512            ("test_name", success(sym("test_name"))),
513            ("test$name", success(sym("test$name"))),
514            // Alphanumeric combinations
515            ("var123", success(sym("var123"))),
516            ("-", success(sym("-"))),
517            ("-abc", success(sym("-abc"))),
518            // Invalid symbol tests - numbers at start cause parse error, or invalid chars
519            ("123var", Error),
520            ("-42name", Error),
521            ("test space", Error),
522            ("test@home", Error),
523            ("test#tag", Error),
524            ("test%percent", Error),
525            // ===== BOOLEAN PARSING =====
526            // Valid booleans
527            ("#t", success(true)),
528            ("#f", success(false)),
529            // Should fail - case sensitive
530            ("#T", Error),
531            ("#F", Error),
532            ("#true", Error),
533            ("#false", Error),
534            // ===== STRING PARSING =====
535            // Basic strings
536            ("\"hello\"", success("hello")),
537            ("\"hello world\"", success("hello world")),
538            // Test escape sequences (using raw strings for clarity)
539            (r#""hello\nworld""#, success("hello\nworld")),
540            (r#""tab\there""#, success("tab\there")),
541            (r#""carriage\rreturn""#, success("carriage\rreturn")),
542            (r#""quote\"test""#, success("quote\"test")),
543            (r#""backslash\\test""#, success("backslash\\test")),
544            // Test unknown escape sequences (should fail)
545            (r#""other\xchar""#, Error), // Unknown escape \x
546            (r#""test\zchar""#, Error),  // Unknown escape \z
547            // Test empty string
548            ("\"\"", success("")),
549            // Test unterminated string (should fail)
550            (r#""unterminated"#, Error),
551            (r#""unterminated\"#, Error), // ends with backslash
552            (r#""test\""#, Error),        // string with just escape at end
553            // ===== NIL PARSING =====
554            ("()", success(nil())),
555            // ===== LIST PARSING =====
556            // Single element list
557            ("(42)", success([42])),
558            // Regular list with mixed types
559            (
560                "(1 hello \"world\" #t)",
561                success([val(1), sym("hello"), val("world"), val(true)]),
562            ),
563            // Regular list (not a builtin operation)
564            ("(1 2 3)", success([1, 2, 3])),
565            // Test that builtin operations are parsed as PrecompiledOp
566            ("(+ 1 2)", precompiled_op("+", vec![val(1), val(2)])),
567            (
568                "(* 3 4 5)",
569                precompiled_op("*", vec![val(3), val(4), val(5)]),
570            ),
571            ("(< 1 2)", precompiled_op("<", vec![val(1), val(2)])),
572            (
573                "(if #t 1 2)",
574                precompiled_op("if", vec![val(true), val(1), val(2)]),
575            ),
576            // Test that non-builtin symbols still create regular lists
577            ("(foo 1 2)", success([sym("foo"), val(1), val(2)])),
578            // Test list with only symbols (should remain a list)
579            ("(a b c)", success([sym("a"), sym("b"), sym("c")])),
580            // Test list starting with number (should remain a list)
581            (
582                "(42 is the answer)",
583                success([val(42), sym("is"), sym("the"), sym("answer")]),
584            ),
585            // ===== QUOTE PARSING =====
586            // Quote shorthand - semantically equivalent to longhand
587            (
588                "'foo",
589                semantically_equivalent(val([sym("quote"), sym("foo")])),
590            ),
591            (
592                "'(1 2 3)",
593                semantically_equivalent(val([sym("quote"), val([1, 2, 3])])),
594            ),
595            ("'()", semantically_equivalent(val([sym("quote"), nil()]))),
596            // Longhand quote syntax - should create PrecompiledOp
597            ("(quote foo)", precompiled_op("quote", vec![sym("foo")])),
598            (
599                "(quote (1 2 3))",
600                precompiled_op("quote", vec![val([1, 2, 3])]),
601            ),
602            // ===== NESTED LIST PARSING =====
603            ("((1 2) (3 4))", success([[1, 2], [3, 4]])),
604            // Test nested lists with mixed builtins and regular lists
605            (
606                "((+ 1 2) (foo bar))",
607                success([
608                    val(Value::PrecompiledOp {
609                        op: find_scheme_op("+").unwrap(),
610                        op_id: "+".into(),
611                        args: vec![val(1), val(2)],
612                    }),
613                    val([sym("foo"), sym("bar")]),
614                ]),
615            ),
616            // Test nested expressions that should parse successfully
617            (
618                "(car (list 1 2 3))",
619                precompiled_op(
620                    "car",
621                    vec![val(Value::PrecompiledOp {
622                        op: find_scheme_op("list").unwrap(),
623                        op_id: "list".into(),
624                        args: vec![val(1), val(2), val(3)],
625                    })],
626                ),
627            ),
628            // ===== WHITESPACE HANDLING =====
629            // Test various whitespace scenarios
630            ("  42  ", success(42)),
631            ("\t#t\n", success(true)),
632            ("\r\n  foo  \t", success(sym("foo"))),
633            // Lists with various whitespace
634            ("( 1   2\t\n3 )", success([1, 2, 3])),
635            // Empty list with whitespace
636            ("(   )", success(nil())),
637            ("(\t\n)", success(nil())),
638            // ===== COMPLEX NESTED STRUCTURES =====
639            // Deeply nested lists
640            ("(((1)))", success([val([val([val(1)])])])),
641            // Mixed types in nested structure
642            (
643                "(foo (\"bar\" #t) -123)",
644                success([sym("foo"), val([val("bar"), val(true)]), val(-123)]),
645            ),
646            // ===== GENERAL ERROR CASES =====
647            // Mismatched parentheses
648            ("(1 2 3", Error), // Missing closing
649            ("1 2 3)", Error), // Extra closing
650            ("((1 2)", Error), // Nested missing closing
651            // Empty input
652            ("", Error),
653            ("   ", Error), // Just whitespace
654            // Invalid characters at start
655            (")", Error),
656            ("@invalid", Error),
657            // Multiple expressions (should fail for main parse function)
658            ("1 2", Error),
659            ("(+ 1 2) (+ 3 4)", Error),
660            // ===== PARSE-TIME ARITY ERRORS =====
661            // Special forms with strict arity requirements
662
663            // SCHEME-JSONLOGIC-STRICT: Require exactly 3 arguments
664            // (Scheme allows 2 args with undefined behavior, JSONLogic allows chaining with >3 args)
665            // 'if' requires exactly 3 arguments
666            ("(if #t 1)", SpecificError("ArityError")), // Too few args
667            ("(if #t 42 0 extra)", SpecificError("ArityError")), // Too many args
668            ("(if)", SpecificError("ArityError")),      // No args
669            // 'and' requires at least 1 argument
670            ("(and)", SpecificError("ArityError")), // No args
671            // 'or' requires at least 1 argument
672            ("(or)", SpecificError("ArityError")), // No args
673            // 'not' requires exactly 1 argument
674            ("(not)", SpecificError("ArityError")), // No args
675            ("(not #t #f)", SpecificError("ArityError")), // Too many args
676            // 'car' requires exactly 1 argument
677            ("(car)", SpecificError("ArityError")), // No args
678            ("(car (list 1 2) extra)", SpecificError("ArityError")), // Too many args
679            // 'cdr' requires exactly 1 argument
680            ("(cdr)", SpecificError("ArityError")), // No args
681            ("(cdr (list 1 2) extra)", SpecificError("ArityError")), // Too many args
682            // Valid cases with correct arity should parse as PrecompiledOps
683            (
684                "(if #t 1 2)",
685                precompiled_op("if", vec![val(true), val(1), val(2)]),
686            ),
687            (
688                "(and #t #f)",
689                precompiled_op("and", vec![val(true), val(false)]),
690            ),
691            (
692                "(or #f #t)",
693                precompiled_op("or", vec![val(false), val(true)]),
694            ),
695            ("(not #f)", precompiled_op("not", vec![val(false)])),
696            // Test nested arity errors are also caught
697            ("(list (not) 42)", SpecificError("ArityError")),
698            // ===== PARSE-TIME SYNTAX ERRORS =====
699            // Unclosed parentheses - should contain parse error information
700            ("(+ 1 (- 2", SpecificError("ParseError")),
701            // Extra closing parentheses
702            ("1 2 3)", SpecificError("ParseError")),
703            ("(1 2))", SpecificError("ParseError")),
704            // Invalid starting characters
705            (")", SpecificError("ParseError")),
706            // ===== DEPTH LIMIT TESTS =====
707            // Test that deeply nested expressions are rejected (MAX_PARSE_DEPTH = 32)
708            // This prevents stack overflow attacks from deeply nested structures
709            ("@invalid", SpecificError("ParseError")),
710            // Empty or whitespace-only input
711            ("", SpecificError("ParseError")),
712            ("   ", SpecificError("ParseError")),
713            ("\t\n", SpecificError("ParseError")),
714            // Multiple top-level expressions (not supported)
715            ("1 2", SpecificError("ParseError")),
716            ("(+ 1 2) (+ 3 4)", SpecificError("ParseError")),
717            ("42 #t", SpecificError("ParseError")),
718            // Valid expressions should parse successfully
719            ("symbol", success(sym("symbol"))), // Raw symbol, not quoted
720        ];
721
722        run_parse_tests(test_cases);
723    }
724
725    #[test]
726    fn test_parser_depth_limits() {
727        // Create depth limit test strings
728        let parens_under_limit = format!(
729            "{}unbound{}",
730            "(".repeat(MAX_PARSE_DEPTH - 1),
731            ")".repeat(MAX_PARSE_DEPTH - 1)
732        );
733        let quotes_under_limit = format!("{}unbound", "'".repeat(MAX_PARSE_DEPTH - 1));
734        let deep_parens_at_limit = format!(
735            "{}1{}",
736            "(".repeat(MAX_PARSE_DEPTH),
737            ")".repeat(MAX_PARSE_DEPTH)
738        );
739        let deep_quotes_at_limit = format!("{}a", "'".repeat(MAX_PARSE_DEPTH));
740
741        let depth_test_cases = vec![
742            // At/over limit should fail at parse time with specific error
743            (
744                deep_parens_at_limit.as_str(),
745                SpecificError("Invalid syntax"),
746            ),
747            (
748                deep_quotes_at_limit.as_str(),
749                SpecificError("Invalid syntax"),
750            ),
751        ];
752
753        run_parse_tests(depth_test_cases);
754
755        // Verify that expressions just under the limit parse successfully
756        assert!(
757            parse_scheme(&parens_under_limit).is_ok(),
758            "Parens just under depth limit should parse successfully"
759        );
760        assert!(
761            parse_scheme(&quotes_under_limit).is_ok(),
762            "Quotes just under depth limit should parse successfully"
763        );
764    }
765}
rulesxp/scheme.rs

rulesxp/
scheme.rs