grift_parser/lib.rs
1#![no_std]
2#![forbid(unsafe_code)]
3
4//! # Lisp Parser
5//!
6//! A classic Lisp parser with arena-allocated values.
7//!
8//! ## Design
9//!
10//! - Symbols use contiguous string storage for memory efficiency
11//! - Symbol interning ensures the same symbol name returns the same index
12//! - All values are stored in a `pwn_arena` arena
13//! - Supports garbage collection via the `Trace` trait
14//! - Explicit boolean values (#t, #f) separate from nil/empty list
15//! - **Strict evaluation** - All arguments are evaluated before function application (call-by-value)
16//!
17//! ## Value Representation
18//!
19//! - `Nil` - The empty list (NOT false!)
20//! - `True` - Boolean true (#t)
21//! - `False` - Boolean false (#f)
22//! - `Number(isize)` - Integer numbers (exact)
23//! - `Float(f64)` - Floating-point numbers (inexact)
24//! - `Char(char)` - Single character
25//! - `Cons { car, cdr }` - Pair/list cell
26//! - `Symbol { chars }` - Symbol with contiguous string storage
27//! - `Lambda { params, body, env }` - Closure
28//! - `Builtin(Builtin)` - Optimized built-in function
29//! - `StdLib(StdLib)` - Standard library function (static code, parsed on-demand)
30//!
31//! ## Reserved Slots
32//!
33//! The first 4 slots of the arena are reserved:
34//! - Slot 0: `Value::Nil` - the empty list singleton
35//! - Slot 1: `Value::True` - boolean true singleton
36//! - Slot 2: `Value::False` - boolean false singleton
37//! - Slot 3: `Value::Cons` - intern table reference cell
38//!
39//! ## Pitfalls and Gotchas
40//!
41//! ### Truthiness
42//! - **Only `#f` is false!** Everything else is truthy, including:
43//! - `nil` / `'()` (the empty list)
44//! - `0` (the number zero)
45//! - Empty strings
46//!
47//! ### Garbage Collection
48//! - The intern table is always a GC root - interned symbols are never collected
49//! - Reserved slots (nil, true, false) are implicitly preserved
50//! - Run `gc()` with appropriate roots to reclaim memory
51//!
52//! ### StdLib Functions
53//! - Body is parsed on each call (minor overhead, but keeps code out of arena)
54//! - Recursive stdlib functions work via the global environment
55//! - Errors in static source strings are only caught at runtime
56
57pub use pwn_arena::{Arena, ArenaIndex, ArenaError, ArenaResult, Trace, GcStats};
58
59/// Macro for defining built-in functions.
60///
61/// This macro generates the `Builtin` enum, its `name()` method, and the `ALL` constant
62/// from a single declarative definition. To add a new builtin, simply add a new entry
63/// to the macro invocation (and implement its evaluation in grift_eval).
64///
65/// # Syntax
66///
67/// ```rust
68/// use grift_parser::define_builtins;
69/// define_builtins! {
70/// /// Documentation comment
71/// VariantName => "lisp-name",
72/// // ... more builtins
73/// }
74/// ```
75///
76/// # Example
77///
78/// To add a new builtin `my-builtin`:
79///
80/// ```rust
81/// use grift_parser::define_builtins;
82/// define_builtins! {
83/// // ... existing builtins ...
84/// /// (my-builtin x) - Does something with x
85/// MyBuiltin => "my-builtin",
86/// }
87/// ```
88///
89/// Note: After adding a builtin here, you must also implement its evaluation
90/// logic in the `grift_eval` crate.
91#[macro_export]
92macro_rules! define_builtins {
93 (
94 $(
95 $(#[$attr:meta])*
96 $variant:ident => $name:literal
97 ),* $(,)?
98 ) => {
99 /// Built-in functions (optimization to avoid symbol lookup)
100 ///
101 /// NOTE: This Lisp supports mutation via set!, set-car!, and set-cdr!
102 /// - Mutation operations break referential transparency
103 /// - All evaluation is call-by-value (strict)
104 ///
105 /// # Adding New Builtins
106 ///
107 /// To add a new builtin:
108 /// 1. Add an entry to the `define_builtins!` macro invocation
109 /// 2. Implement its evaluation logic in `grift_eval`
110 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
111 pub enum Builtin {
112 $(
113 $(#[$attr])*
114 $variant,
115 )*
116 }
117
118 impl Builtin {
119 /// Get the symbol name for this builtin
120 pub const fn name(&self) -> &'static str {
121 match self {
122 $(
123 Builtin::$variant => $name,
124 )*
125 }
126 }
127
128 /// All builtins for initialization
129 pub const ALL: &'static [Builtin] = &[
130 $(
131 Builtin::$variant,
132 )*
133 ];
134 }
135 };
136}
137
138// Define all built-in functions using the macro.
139// To add a new builtin, add an entry here and implement its evaluation in grift_eval.
140define_builtins! {
141 // List operations
142 /// car - Get first element of pair
143 Car => "car",
144 /// cdr - Get second element of pair
145 Cdr => "cdr",
146 /// cons - Create a pair
147 Cons => "cons",
148 /// list - Create a list from arguments
149 List => "list",
150
151 // Predicates (Scheme R7RS compliant)
152 /// null? - Check if value is the empty list
153 Null => "null?",
154 /// pair? - Check if value is a pair
155 Pairp => "pair?",
156 /// number? - Check if value is a number
157 Numberp => "number?",
158 /// boolean? - Check if value is a boolean
159 Booleanp => "boolean?",
160 /// procedure? - Check if value is a procedure
161 Procedurep => "procedure?",
162 /// symbol? - Check if value is a symbol
163 Symbolp => "symbol?",
164 /// eq? - Scheme-compliant identity equality
165 EqP => "eq?",
166 /// eqv? - Scheme-compliant value equality
167 EqvP => "eqv?",
168 /// equal? - Scheme-compliant recursive structural equality
169 EqualP => "equal?",
170
171 // Arithmetic
172 /// + - Addition
173 Add => "+",
174 /// - - Subtraction
175 Sub => "-",
176 /// * - Multiplication
177 Mul => "*",
178 /// / - Division
179 Div => "/",
180 /// modulo - Scheme modulo (result has sign of divisor)
181 Modulo => "modulo",
182 /// remainder - Scheme remainder (result has sign of dividend)
183 Remainder => "remainder",
184 /// quotient - Integer quotient (truncated towards zero)
185 Quotient => "quotient",
186 /// abs - Absolute value
187 Abs => "abs",
188 /// max - Maximum of numbers
189 Max => "max",
190 /// min - Minimum of numbers
191 Min => "min",
192 /// gcd - Greatest common divisor
193 Gcd => "gcd",
194 /// lcm - Least common multiple
195 Lcm => "lcm",
196 /// expt - Exponentiation
197 Expt => "expt",
198 /// square - Square of a number
199 Square => "square",
200
201 // Numeric predicates
202 /// zero? - Check if number is zero
203 Zerop => "zero?",
204 /// positive? - Check if number is positive
205 Positivep => "positive?",
206 /// negative? - Check if number is negative
207 Negativep => "negative?",
208 /// odd? - Check if number is odd
209 Oddp => "odd?",
210 /// even? - Check if number is even
211 Evenp => "even?",
212 /// integer? - Check if value is an integer
213 Integerp => "integer?",
214 /// exact? - Check if number is exact (always true for integers)
215 Exactp => "exact?",
216 /// inexact? - Check if number is inexact (always false for integers)
217 Inexactp => "inexact?",
218 /// exact-integer? - Check if value is an exact integer
219 ExactIntegerp => "exact-integer?",
220
221 // Rounding operations (R7RS Section 6.2.6) - Identity for integers
222 /// floor - Largest integer not greater than x (identity for integers)
223 Floor => "floor",
224 /// ceiling - Smallest integer not less than x (identity for integers)
225 Ceiling => "ceiling",
226 /// truncate - Integer closest to x whose absolute value is not larger (identity for integers)
227 Truncate => "truncate",
228 /// round - Closest integer to x, rounding to even when x is halfway (identity for integers)
229 Round => "round",
230
231 // Comparison
232 /// < - Less than
233 Lt => "<",
234 /// > - Greater than
235 Gt => ">",
236 /// <= - Less than or equal
237 Le => "<=",
238 /// >= - Greater than or equal
239 Ge => ">=",
240 /// = - Numeric equality
241 NumEq => "=",
242
243 // Boolean operations
244 /// not - Boolean negation
245 Not => "not",
246
247 // I/O
248 /// newline - Print a newline
249 Newline => "newline",
250 /// display - Print value without quotes
251 Display => "display",
252
253 // Error handling
254 /// error - Raise an error
255 Error => "error",
256
257 // Mutation operations
258 /// set-car! - Mutate car of pair
259 SetCar => "set-car!",
260 /// set-cdr! - Mutate cdr of pair
261 SetCdr => "set-cdr!",
262
263 // Array operations (O(1) indexed access and mutation) - Embedded extension
264 /// make-array - Create an array with given length and initial value
265 MakeArray => "make-array",
266 /// array-ref - Get element at index (O(1))
267 ArrayRef => "array-ref",
268 /// array-set! - Set element at index (O(1))
269 ArraySet => "array-set!",
270 /// array-length - Get array length (O(1))
271 ArrayLength => "array-length",
272 /// array? - Check if value is an array
273 Arrayp => "array?",
274
275 // Vector operations (R7RS Section 6.8)
276 // Vectors use the same underlying representation as arrays
277 /// vector? - Check if value is a vector
278 Vectorp => "vector?",
279 /// make-vector - Create a vector with optional fill value
280 MakeVector => "make-vector",
281 /// vector - Create vector from arguments
282 Vector => "vector",
283 /// vector-length - Get length of vector
284 VectorLength => "vector-length",
285 /// vector-ref - Get element at index
286 VectorRef => "vector-ref",
287 /// vector-set! - Set element at index
288 VectorSet => "vector-set!",
289 /// vector->list - Convert vector to list
290 VectorToList => "vector->list",
291 /// list->vector - Convert list to vector
292 ListToVector => "list->vector",
293 /// vector-fill! - Fill vector with value
294 VectorFill => "vector-fill!",
295 /// vector-copy - Copy a vector
296 VectorCopy => "vector-copy",
297
298 // Character operations (R7RS Section 6.6)
299 /// char? - Check if value is a character
300 Charp => "char?",
301 /// char=? - Character equality
302 CharEq => "char=?",
303 /// char<? - Character less than
304 CharLt => "char<?",
305 /// char>? - Character greater than
306 CharGt => "char>?",
307 /// char<=? - Character less than or equal
308 CharLe => "char<=?",
309 /// char>=? - Character greater than or equal
310 CharGe => "char>=?",
311 /// char->integer - Convert character to its Unicode code point
312 CharToInteger => "char->integer",
313 /// integer->char - Convert Unicode code point to character
314 IntegerToChar => "integer->char",
315 /// char-upcase - Convert character to uppercase
316 CharUpcase => "char-upcase",
317 /// char-downcase - Convert character to lowercase
318 CharDowncase => "char-downcase",
319
320 // String operations (R7RS Section 6.7)
321 /// string? - Check if value is a string
322 Stringp => "string?",
323 /// make-string - Create a string of given length
324 MakeString => "make-string",
325 /// string - Create string from characters
326 String => "string",
327 /// string-length - Get length of string
328 StringLength => "string-length",
329 /// string-ref - Get character at index
330 StringRef => "string-ref",
331 /// string-set! - Set character at index
332 StringSet => "string-set!",
333 /// string=? - String equality
334 StringEq => "string=?",
335 /// string<? - String less than
336 StringLt => "string<?",
337 /// string>? - String greater than
338 StringGt => "string>?",
339 /// string<=? - String less than or equal
340 StringLe => "string<=?",
341 /// string>=? - String greater than or equal
342 StringGe => "string>=?",
343 /// string-append - Concatenate strings
344 StringAppend => "string-append",
345 /// string->list - Convert string to list of characters
346 StringToList => "string->list",
347 /// list->string - Convert list of characters to string
348 ListToString => "list->string",
349 /// substring - Extract a substring
350 Substring => "substring",
351 /// string-copy - Copy a string
352 StringCopy => "string-copy",
353
354 // Garbage collection and arena control
355 /// gc - Manually trigger garbage collection
356 Gc => "gc",
357 /// gc-enable - Enable automatic garbage collection
358 GcEnable => "gc-enable",
359 /// gc-disable - Disable automatic garbage collection
360 GcDisable => "gc-disable",
361 /// gc-enabled? - Check if GC is enabled
362 GcEnabledP => "gc-enabled?",
363 /// arena-stats - Get arena statistics as a list
364 ArenaStats => "arena-stats",
365}
366
367/// Macro for defining standard library functions.
368///
369/// This macro generates the `StdLib` enum, its implementation, and the `ALL` constant
370/// from a single declarative definition. To add a new stdlib function, simply add
371/// a new entry to the macro invocation.
372///
373/// # Syntax
374///
375/// ```rust
376/// use grift_parser::define_stdlib;
377/// define_stdlib! {
378/// /// Documentation comment
379/// VariantName("function-name", ["param1", "param2"], "lisp-body-code"),
380/// // ... more functions
381/// }
382/// ```
383///
384/// # Example
385///
386/// To add a new function `(my-func x y)` that returns `(+ x y)`:
387///
388/// ```rust
389/// use grift_parser::define_stdlib;
390/// define_stdlib! {
391/// // ... existing functions ...
392/// /// (my-func x y) - Add two numbers
393/// MyFunc("my-func", ["x", "y"], "(+ x y)"),
394/// }
395/// ```
396#[macro_export]
397macro_rules! define_stdlib {
398 (
399 $(
400 $(#[$attr:meta])*
401 $variant:ident($name:literal, [$($param:literal),* $(,)?], $body:literal)
402 ),* $(,)?
403 ) => {
404 /// Standard library functions (stored in static memory, not arena)
405 ///
406 /// These functions are defined as Lisp code in static strings and are parsed
407 /// on-demand when called. This provides:
408 /// - Zero arena cost for function definitions (static strings)
409 /// - Easy maintenance (just add entries to the `define_stdlib!` macro)
410 /// - Simple implementation (no build scripts needed)
411 ///
412 /// The parsing overhead is minimal since:
413 /// 1. Standard library functions are typically called frequently (can optimize)
414 /// 2. Parsing is fast (simple recursive descent)
415 /// 3. Parsed AST is temporary and GC'd after evaluation
416 ///
417 /// # Memory Layout
418 ///
419 /// Each StdLib variant stores references to static data:
420 /// - Function name (for lookup and debugging)
421 /// - Parameter names (static slice)
422 /// - Body source code (static string, parsed on each call)
423 ///
424 /// # Adding New Functions
425 ///
426 /// To add a new stdlib function, add an entry to the `define_stdlib!` macro
427 /// invocation. No other code changes are needed.
428 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
429 pub enum StdLib {
430 $(
431 $(#[$attr])*
432 $variant,
433 )*
434 }
435
436 impl StdLib {
437 /// Get the function name
438 pub const fn name(&self) -> &'static str {
439 match self {
440 $(
441 StdLib::$variant => $name,
442 )*
443 }
444 }
445
446 /// Get the parameter names for this function
447 pub const fn params(&self) -> &'static [&'static str] {
448 match self {
449 $(
450 StdLib::$variant => &[$($param),*],
451 )*
452 }
453 }
454
455 /// Get the body source code (Lisp expression as static string)
456 ///
457 /// This string is parsed on each call to the function.
458 /// The parsed AST is temporary and GC'd after evaluation.
459 pub const fn body(&self) -> &'static str {
460 match self {
461 $(
462 StdLib::$variant => $body,
463 )*
464 }
465 }
466
467 /// All standard library functions for initialization
468 pub const ALL: &'static [StdLib] = &[
469 $(
470 StdLib::$variant,
471 )*
472 ];
473 }
474 };
475}
476
477// Define all standard library functions using the include_stdlib! macro.
478// This macro reads the stdlib.scm file and generates the StdLib enum.
479// To add a new function, simply add a new entry in stdlib.scm.
480// Note: member/assoc use eq? for comparison (like Scheme's memq/assq).
481// This works for symbols and identical objects. For value comparison,
482// define a custom function or use fold with a predicate.
483grift_macros::include_stdlib!("src/stdlib.scm");
484
485/// A Lisp value
486#[derive(Clone, Copy, Debug, PartialEq)]
487pub enum Value {
488 /// The empty list (NOT false - use False for that)
489 Nil,
490
491 /// Boolean true (#t)
492 True,
493
494 /// Boolean false (#f) - the ONLY false value
495 False,
496
497 /// Integer number (exact)
498 Number(isize),
499
500 /// Floating-point number (inexact)
501 Float(f64),
502
503 /// Single character (used in strings and symbol storage)
504 Char(char),
505
506 /// Cons cell (pair)
507 Cons {
508 car: ArenaIndex,
509 cdr: ArenaIndex,
510 },
511
512 /// Symbol (contains a contiguous string)
513 /// The `chars` field points to a Value::String which contains the symbol name.
514 /// The length is obtained from the String value, providing a single source of truth.
515 Symbol {
516 chars: ArenaIndex,
517 },
518
519 /// Lambda / closure (memory-optimized)
520 ///
521 /// To minimize enum size, lambda data is stored as a linked list in the arena:
522 /// `data` points to a cons cell `(params . (body . env))` where:
523 /// - `params`: List of parameter symbols
524 /// - `body`: Expression to evaluate
525 /// - `env`: Captured environment (alist)
526 ///
527 /// Use `Lisp::lambda()` to create and `Lisp::lambda_parts()` to extract.
528 Lambda {
529 data: ArenaIndex, // Points to (params . (body . env))
530 },
531
532 /// Built-in function (optimized)
533 Builtin(Builtin),
534
535 /// Standard library function (stored in static memory with caching)
536 ///
537 /// Unlike Lambda which stores code in the arena, StdLib references static
538 /// function definitions. The function body is parsed on first call and cached,
539 /// providing good performance while minimizing initial arena cost.
540 ///
541 /// # Memory Efficiency
542 ///
543 /// - Function definitions are in static memory (const strings)
544 /// - Parsed body and params are cached on first call
545 /// - Subsequent calls reuse the cached parsed AST
546 ///
547 /// # Cache Field
548 ///
549 /// - `cache`: NULL until first call, then points to `(body . params)` cons cell
550 ///
551 /// Use `Lisp::stdlib()` to create and `Lisp::stdlib_cache()` to access cache.
552 StdLib {
553 func: StdLib,
554 cache: ArenaIndex, // NULL = not yet parsed, else (body . params)
555 },
556
557 /// Array (contiguous storage of values in the arena)
558 ///
559 /// Arrays store values contiguously in the arena, similar to how symbols
560 /// store characters. This provides O(1) indexed access and mutation.
561 ///
562 /// # Memory Layout
563 ///
564 /// - `data` points to the first element in contiguous storage
565 /// - Elements are stored at data+0, data+1, ..., data+(len-1)
566 ///
567 /// # Example
568 ///
569 /// ```lisp
570 /// (define arr (make-array 3 0)) ; Create array of 3 zeros
571 /// (array-set! arr 1 42) ; Set index 1 to 42
572 /// (array-ref arr 1) ; => 42
573 /// (array-length arr) ; => 3
574 /// ```
575 Array {
576 data: ArenaIndex, // Points to first element in contiguous block
577 len: usize, // Number of elements
578 },
579
580 /// String (contiguous storage of Char values in the arena)
581 ///
582 /// Strings store characters contiguously in the arena, similar to arrays.
583 /// This provides O(1) indexed access and O(1) length lookup.
584 ///
585 /// # Memory Layout
586 ///
587 /// - `data` points to the first character in contiguous storage
588 /// - Characters are stored at data+0, data+1, ..., data+(len-1)
589 ///
590 /// # Example
591 ///
592 /// ```lisp
593 /// (string-length "hello") ; => 5
594 /// (string-ref "hello" 0) ; => #\h
595 /// ```
596 String {
597 data: ArenaIndex, // Points to first Char in contiguous block
598 len: usize, // Number of characters
599 },
600
601 /// Native function (Rust function callable from Lisp)
602 ///
603 /// Native functions are registered at runtime and identified by their ID.
604 /// The actual function pointer is stored in the evaluator's NativeRegistry.
605 ///
606 /// # Fields
607 ///
608 /// - `id`: Index into the NativeRegistry's entries array
609 /// - `name_hash`: Hash of the function name for quick comparison
610 Native {
611 id: usize, // Index in the NativeRegistry
612 name_hash: usize, // Hash for debugging/lookup verification
613 },
614}
615
616impl Value {
617 /// Check if this value is nil (empty list)
618 #[inline]
619 pub const fn is_nil(&self) -> bool {
620 matches!(self, Value::Nil)
621 }
622
623 /// Check if this value is false (#f)
624 /// This is the ONLY way to be false in this Lisp
625 #[inline]
626 pub const fn is_false(&self) -> bool {
627 matches!(self, Value::False)
628 }
629
630 /// Check if this value is true (#t)
631 #[inline]
632 pub const fn is_true(&self) -> bool {
633 matches!(self, Value::True)
634 }
635
636 /// Check if this value is a boolean (#t or #f)
637 #[inline]
638 pub const fn is_boolean(&self) -> bool {
639 matches!(self, Value::True | Value::False)
640 }
641
642 /// Check if this value is an atom (not a cons cell)
643 #[inline]
644 pub const fn is_atom(&self) -> bool {
645 !matches!(self, Value::Cons { .. })
646 }
647
648 /// Check if this value is a number (integer or float)
649 #[inline]
650 pub const fn is_number(&self) -> bool {
651 matches!(self, Value::Number(_) | Value::Float(_))
652 }
653
654 /// Check if this value is an integer
655 #[inline]
656 pub const fn is_integer(&self) -> bool {
657 matches!(self, Value::Number(_))
658 }
659
660 /// Check if this value is a float
661 #[inline]
662 pub const fn is_float(&self) -> bool {
663 matches!(self, Value::Float(_))
664 }
665
666 /// Check if this value is a symbol
667 #[inline]
668 pub const fn is_symbol(&self) -> bool {
669 matches!(self, Value::Symbol { .. })
670 }
671
672 /// Check if this value is a cons cell (pair)
673 #[inline]
674 pub const fn is_cons(&self) -> bool {
675 matches!(self, Value::Cons { .. })
676 }
677
678 /// Check if this value is a lambda
679 #[inline]
680 pub const fn is_lambda(&self) -> bool {
681 matches!(self, Value::Lambda { .. })
682 }
683
684 /// Check if this value is a builtin
685 #[inline]
686 pub const fn is_builtin(&self) -> bool {
687 matches!(self, Value::Builtin(_))
688 }
689
690 /// Check if this value is a stdlib function
691 #[inline]
692 pub const fn is_stdlib(&self) -> bool {
693 matches!(self, Value::StdLib { .. })
694 }
695
696 /// Check if this value is a native (Rust) function
697 #[inline]
698 pub const fn is_native(&self) -> bool {
699 matches!(self, Value::Native { .. })
700 }
701
702 /// Check if this value is a procedure (lambda, builtin, stdlib, or native function)
703 #[inline]
704 pub const fn is_procedure(&self) -> bool {
705 matches!(self, Value::Lambda { .. } | Value::Builtin(_) | Value::StdLib { .. } | Value::Native { .. })
706 }
707
708 /// Check if this value is an array
709 #[inline]
710 pub const fn is_array(&self) -> bool {
711 matches!(self, Value::Array { .. })
712 }
713
714 /// Check if this value is a string
715 #[inline]
716 pub const fn is_string(&self) -> bool {
717 matches!(self, Value::String { .. })
718 }
719
720 /// Get the number value if this is an integer
721 #[inline]
722 pub const fn as_number(&self) -> Option<isize> {
723 match self {
724 Value::Number(n) => Some(*n),
725 _ => None,
726 }
727 }
728
729 /// Get the float value if this is a float
730 #[inline]
731 pub const fn as_float(&self) -> Option<f64> {
732 match self {
733 Value::Float(f) => Some(*f),
734 _ => None,
735 }
736 }
737
738 /// Get any numeric value as f64
739 #[inline]
740 pub const fn as_f64(&self) -> Option<f64> {
741 match self {
742 Value::Number(n) => Some(*n as f64),
743 Value::Float(f) => Some(*f),
744 _ => None,
745 }
746 }
747
748 /// Get the char value if this is a char
749 #[inline]
750 pub const fn as_char(&self) -> Option<char> {
751 match self {
752 Value::Char(c) => Some(*c),
753 _ => None,
754 }
755 }
756
757 /// Get a human-readable type name
758 pub const fn type_name(&self) -> &'static str {
759 match self {
760 Value::Nil => "nil",
761 Value::True | Value::False => "boolean",
762 Value::Number(_) => "number",
763 Value::Float(_) => "number",
764 Value::Char(_) => "char",
765 Value::Cons { .. } => "pair",
766 Value::Symbol { .. } => "symbol",
767 Value::Lambda { .. } => "procedure",
768 Value::Builtin(_) => "procedure",
769 Value::StdLib { .. } => "procedure",
770 Value::Native { .. } => "native",
771 Value::Array { .. } => "array",
772 Value::String { .. } => "string",
773 }
774 }
775}
776
777/// Implement Trace for GC support
778impl<const N: usize> Trace<Value, N> for Value {
779 fn trace<F: FnMut(ArenaIndex)>(&self, mut tracer: F) {
780 match self {
781 Value::Nil | Value::True | Value::False |
782 Value::Number(_) | Value::Float(_) | Value::Char(_) | Value::Builtin(_) |
783 Value::Native { .. } => {
784 // No references
785 }
786 Value::StdLib { cache, .. } => {
787 // Trace cached cons cell (body . params) if it exists
788 if !cache.is_null() {
789 tracer(*cache);
790 }
791 }
792 Value::Cons { car, cdr } => {
793 tracer(*car);
794 tracer(*cdr);
795 }
796 Value::Symbol { chars } => {
797 // chars points to a Value::String, which handles its own tracing
798 tracer(*chars);
799 }
800 Value::Lambda { data } => {
801 // data points to (params . (body . env)), trace the whole structure
802 tracer(*data);
803 }
804 Value::Array { data, len } => {
805 // For non-empty arrays, trace all elements in the contiguous block
806 // Empty arrays (len == 0) have data == NULL, so skip tracing
807 if *len > 0 {
808 let base_idx = data.raw();
809 for i in 0..*len {
810 let elem_idx = ArenaIndex::new(base_idx + i);
811 tracer(elem_idx);
812 }
813 }
814 }
815 Value::String { data, len } => {
816 // For non-empty strings, trace all Char slots in the contiguous block
817 // Empty strings (len == 0) have data == NULL, so skip tracing
818 if *len > 0 {
819 let base_idx = data.raw();
820 for i in 0..*len {
821 let char_idx = ArenaIndex::new(base_idx + i);
822 tracer(char_idx);
823 }
824 }
825 }
826 }
827 }
828}
829
830// ============================================================================
831// Lisp Context - Arena wrapper with helper methods
832// ============================================================================
833
834/// A Lisp execution context wrapping an arena
835///
836/// ## Reserved Slots
837///
838/// The first 4 slots of the arena are reserved for singleton values:
839/// - Slot 0: `Value::Nil` - the empty list
840/// - Slot 1: `Value::True` - boolean true (#t)
841/// - Slot 2: `Value::False` - boolean false (#f)
842/// - Slot 3: `Value::Cons` - intern table reference cell (car = intern table root)
843///
844/// These slots are pre-allocated during `Lisp::new()` and returned as
845/// constants from `nil()`, `true_val()`, and `false_val()`. This optimization
846/// avoids allocating new slots for these frequently-used values.
847///
848/// ## Symbol Interning
849///
850/// All symbols are interned in an association list stored in the arena.
851/// The `intern_table_slot` field points to a cons cell whose car is the alist
852/// of `(string_index . symbol_index)` pairs. The cons cell is used as a
853/// "reference cell" to allow updating the intern table without RefCell.
854/// When creating a symbol, we first check if it already exists in the table.
855/// This ensures that the same symbol name always returns the same index.
856pub struct Lisp<const N: usize> {
857 arena: Arena<Value, N>,
858 /// Pre-allocated Nil slot (always slot 0)
859 nil_slot: ArenaIndex,
860 /// Pre-allocated True slot (always slot 1)
861 true_slot: ArenaIndex,
862 /// Pre-allocated False slot (always slot 2)
863 false_slot: ArenaIndex,
864 /// Intern table reference cell (always slot 3)
865 /// This is a cons cell where car = intern table root (alist)
866 /// Using a cons cell avoids needing RefCell for interior mutability
867 intern_table_slot: ArenaIndex,
868}
869
870/// Number of reserved slots in the arena (nil, true, false, intern_table_ref)
871pub const RESERVED_SLOTS: usize = 4;
872
873impl<const N: usize> Lisp<N> {
874 /// Create a new Lisp context
875 ///
876 /// Pre-allocates reserved slots for Nil, True, False, and intern table ref cell.
877 /// These slots (0, 1, 2, 3) are never freed and are returned as constants
878 /// from `nil()`, `true_val()`, and `false_val()`.
879 ///
880 /// The intern table is initialized to nil (empty alist).
881 ///
882 /// # Panics
883 ///
884 /// Panics if the arena capacity N < RESERVED_SLOTS, as we need at least 4 slots
885 /// for the reserved singleton values and intern table reference cell.
886 pub fn new() -> Self {
887 const { assert!(N >= RESERVED_SLOTS, "Lisp arena must have capacity >= RESERVED_SLOTS for reserved slots") };
888
889 let arena = Arena::new(Value::Nil);
890
891 // Pre-allocate reserved slots in order: Nil, True, False, InternTableRef
892 // These will be slots 0, 1, 2, 3 respectively
893 let nil_slot = arena.alloc(Value::Nil)
894 .expect("Failed to pre-allocate reserved Nil slot during Lisp initialization");
895 let true_slot = arena.alloc(Value::True)
896 .expect("Failed to pre-allocate reserved True slot during Lisp initialization");
897 let false_slot = arena.alloc(Value::False)
898 .expect("Failed to pre-allocate reserved False slot during Lisp initialization");
899
900 // Pre-allocate intern table reference cell (slot 3)
901 // This is a cons cell where car = intern table root (initially nil)
902 // Using a cons cell as a "reference cell" allows updating via set()
903 // instead of requiring RefCell for interior mutability
904 let intern_table_slot = arena.alloc(Value::Cons { car: nil_slot, cdr: nil_slot })
905 .expect("Failed to pre-allocate intern table reference cell during Lisp initialization");
906
907 Lisp {
908 arena,
909 nil_slot,
910 true_slot,
911 false_slot,
912 intern_table_slot,
913 }
914 }
915
916 /// Get reference to the underlying arena
917 pub fn arena(&self) -> &Arena<Value, N> {
918 &self.arena
919 }
920
921 /// Allocate a value
922 #[inline]
923 pub fn alloc(&self, value: Value) -> ArenaResult<ArenaIndex> {
924 self.arena.alloc(value)
925 }
926
927 /// Get a value
928 #[inline]
929 pub fn get(&self, index: ArenaIndex) -> ArenaResult<Value> {
930 self.arena.get(index)
931 }
932
933 /// Set a value
934 #[inline]
935 pub fn set(&self, index: ArenaIndex, value: Value) -> ArenaResult<()> {
936 self.arena.set(index, value)
937 }
938
939 /// Get an arena index at a given offset from a base index.
940 ///
941 /// This is useful for accessing elements in contiguous storage (strings, arrays).
942 #[inline]
943 pub fn arena_index_at_offset(&self, base: ArenaIndex, offset: usize) -> ArenaResult<ArenaIndex> {
944 self.arena.index_at_offset(base, offset)
945 }
946
947 /// Get the pre-allocated Nil singleton (empty list)
948 ///
949 /// This returns the reserved slot 0 which always contains `Value::Nil`.
950 /// No allocation is performed.
951 #[inline]
952 pub fn nil(&self) -> ArenaResult<ArenaIndex> {
953 Ok(self.nil_slot)
954 }
955
956 /// Get the pre-allocated True singleton (#t)
957 ///
958 /// This returns the reserved slot 1 which always contains `Value::True`.
959 /// No allocation is performed.
960 #[inline]
961 pub fn true_val(&self) -> ArenaResult<ArenaIndex> {
962 Ok(self.true_slot)
963 }
964
965 /// Get the pre-allocated False singleton (#f)
966 ///
967 /// This returns the reserved slot 2 which always contains `Value::False`.
968 /// No allocation is performed.
969 #[inline]
970 pub fn false_val(&self) -> ArenaResult<ArenaIndex> {
971 Ok(self.false_slot)
972 }
973
974 /// Allocate a boolean based on a Rust bool
975 #[inline]
976 pub fn boolean(&self, b: bool) -> ArenaResult<ArenaIndex> {
977 if b { self.true_val() } else { self.false_val() }
978 }
979
980 /// Allocate a number
981 #[inline]
982 pub fn number(&self, n: isize) -> ArenaResult<ArenaIndex> {
983 self.alloc(Value::Number(n))
984 }
985
986 /// Allocate a floating-point number
987 #[inline]
988 pub fn float(&self, f: f64) -> ArenaResult<ArenaIndex> {
989 self.alloc(Value::Float(f))
990 }
991
992 /// Allocate a character
993 #[inline]
994 pub fn char(&self, c: char) -> ArenaResult<ArenaIndex> {
995 self.alloc(Value::Char(c))
996 }
997
998 /// Allocate a cons cell
999 #[inline]
1000 pub fn cons(&self, car: ArenaIndex, cdr: ArenaIndex) -> ArenaResult<ArenaIndex> {
1001 self.alloc(Value::Cons { car, cdr })
1002 }
1003
1004 /// Get car of a cons cell
1005 ///
1006 /// In Scheme R7RS, car of an empty list is an error.
1007 #[inline]
1008 pub fn car(&self, index: ArenaIndex) -> ArenaResult<ArenaIndex> {
1009 match self.get(index)? {
1010 Value::Cons { car, .. } => Ok(car),
1011 // Scheme R7RS: car of empty list is an error
1012 Value::Nil => Err(ArenaError::InvalidIndex),
1013 _ => Err(ArenaError::InvalidIndex),
1014 }
1015 }
1016
1017 /// Get cdr of a cons cell
1018 ///
1019 /// In Scheme R7RS, cdr of an empty list is an error.
1020 #[inline]
1021 pub fn cdr(&self, index: ArenaIndex) -> ArenaResult<ArenaIndex> {
1022 match self.get(index)? {
1023 Value::Cons { cdr, .. } => Ok(cdr),
1024 // Scheme R7RS: cdr of empty list is an error
1025 Value::Nil => Err(ArenaError::InvalidIndex),
1026 _ => Err(ArenaError::InvalidIndex),
1027 }
1028 }
1029
1030 /// Set car of a cons cell (mutation operation)
1031 /// Returns the new value on success
1032 #[inline]
1033 pub fn set_car(&self, index: ArenaIndex, new_car: ArenaIndex) -> ArenaResult<ArenaIndex> {
1034 match self.get(index)? {
1035 Value::Cons { cdr, .. } => {
1036 self.set(index, Value::Cons { car: new_car, cdr })?;
1037 Ok(new_car)
1038 }
1039 _ => Err(ArenaError::InvalidIndex),
1040 }
1041 }
1042
1043 /// Set cdr of a cons cell (mutation operation)
1044 /// Returns the new value on success
1045 #[inline]
1046 pub fn set_cdr(&self, index: ArenaIndex, new_cdr: ArenaIndex) -> ArenaResult<ArenaIndex> {
1047 match self.get(index)? {
1048 Value::Cons { car, .. } => {
1049 self.set(index, Value::Cons { car, cdr: new_cdr })?;
1050 Ok(new_cdr)
1051 }
1052 _ => Err(ArenaError::InvalidIndex),
1053 }
1054 }
1055
1056 // ========================================================================
1057 // Symbol Interning
1058 // ========================================================================
1059
1060 /// Get the intern table root (for GC roots)
1061 ///
1062 /// The intern table is stored in the car of the intern_table_slot cons cell.
1063 pub fn intern_table(&self) -> ArenaIndex {
1064 // intern_table_slot always exists and is slot 3
1065 // Its car contains the actual intern table root
1066 self.intern_table_slot
1067 }
1068
1069 /// Get the current intern table root (the actual alist)
1070 fn get_intern_table_root(&self) -> ArenaResult<ArenaIndex> {
1071 match self.get(self.intern_table_slot)? {
1072 Value::Cons { car, .. } => Ok(car),
1073 _ => unreachable!("intern_table_slot should always be a Cons cell"),
1074 }
1075 }
1076
1077 /// Set the intern table root (update the car of the reference cell)
1078 fn set_intern_table_root(&self, new_root: ArenaIndex) -> ArenaResult<()> {
1079 self.set(self.intern_table_slot, Value::Cons { car: new_root, cdr: self.nil_slot })
1080 }
1081
1082 /// Look up a string in the intern table
1083 /// Returns Some(symbol_index) if found, None otherwise
1084 fn intern_table_lookup(&self, string_idx: ArenaIndex) -> ArenaResult<Option<ArenaIndex>> {
1085 let mut current = self.get_intern_table_root()?;
1086
1087 loop {
1088 match self.get(current)? {
1089 Value::Nil => return Ok(None),
1090 Value::Cons { car, cdr } => {
1091 // car is (string_index . symbol_index)
1092 if let Value::Cons { car: entry_string, cdr: entry_symbol } = self.get(car)? {
1093 if self.string_eq_contiguous(string_idx, entry_string)? {
1094 return Ok(Some(entry_symbol));
1095 }
1096 }
1097 current = cdr;
1098 }
1099 _ => return Err(ArenaError::InvalidIndex),
1100 }
1101 }
1102 }
1103
1104 /// Create or retrieve an interned symbol from a string slice
1105 ///
1106 /// The symbol's `chars` field points to a Value::String with the symbol name.
1107 ///
1108 /// Symbol interning ensures the same symbol name always returns the same index.
1109 ///
1110 /// This provides ~44% memory savings compared to linked list representation.
1111 pub fn symbol(&self, name: &str) -> ArenaResult<ArenaIndex> {
1112 // Create string for the symbol name
1113 let name_str = self.string(name)?;
1114
1115 // Check intern table
1116 if let Some(existing_symbol) = self.intern_table_lookup(name_str)? {
1117 // Free the string we just created since we're using the interned one
1118 self.string_free(name_str)?;
1119 return Ok(existing_symbol);
1120 }
1121
1122 // Not found - create new symbol
1123 let symbol = self.alloc(Value::Symbol { chars: name_str })?;
1124
1125 // Add to intern table: (name_str . symbol)
1126 let binding = self.cons(name_str, symbol)?;
1127 let current_table = self.get_intern_table_root()?;
1128 let new_table = self.cons(binding, current_table)?;
1129
1130 // Update intern table root
1131 self.set_intern_table_root(new_table)?;
1132
1133 Ok(symbol)
1134 }
1135
1136 /// Create or retrieve an interned symbol from bytes (for parsing)
1137 pub fn symbol_from_bytes(&self, bytes: &[u8]) -> ArenaResult<ArenaIndex> {
1138 let char_count = bytes.len();
1139
1140 // Create a Value::String for the symbol name
1141 let name_str = if char_count == 0 {
1142 // Empty string
1143 self.alloc(Value::String {
1144 data: ArenaIndex::NULL,
1145 len: 0
1146 })?
1147 } else {
1148 // Allocate contiguous block for characters
1149 let data = self.arena.alloc_contiguous(char_count, Value::Nil)?;
1150
1151 // Set characters in slots
1152 for (i, &b) in bytes.iter().enumerate() {
1153 let char_idx = self.arena.index_at_offset(data, i)?;
1154 self.arena.set(char_idx, Value::Char(b as char))?;
1155 }
1156
1157 // Create the String value
1158 self.alloc(Value::String { data, len: char_count })?
1159 };
1160
1161 // Check intern table
1162 if let Some(existing_symbol) = self.intern_table_lookup(name_str)? {
1163 // Free the string we just created since we're using the interned one
1164 self.string_free(name_str)?;
1165 return Ok(existing_symbol);
1166 }
1167
1168 // Not found - create new symbol
1169 let symbol = self.alloc(Value::Symbol { chars: name_str })?;
1170
1171 // Add to intern table: (name_str . symbol)
1172 let binding = self.cons(name_str, symbol)?;
1173 let current_table = self.get_intern_table_root()?;
1174 let new_table = self.cons(binding, current_table)?;
1175
1176 // Update intern table root
1177 self.set_intern_table_root(new_table)?;
1178
1179 Ok(symbol)
1180 }
1181
1182 /// Allocate a builtin function
1183 #[inline]
1184 pub fn builtin(&self, b: Builtin) -> ArenaResult<ArenaIndex> {
1185 self.alloc(Value::Builtin(b))
1186 }
1187
1188 /// Allocate a stdlib function
1189 ///
1190 /// StdLib functions are stored in static memory with on-demand parsing.
1191 /// The function body is parsed on first call and cached for reuse.
1192 #[inline]
1193 pub fn stdlib(&self, s: StdLib) -> ArenaResult<ArenaIndex> {
1194 self.alloc(Value::StdLib {
1195 func: s,
1196 cache: ArenaIndex::NULL,
1197 })
1198 }
1199
1200 /// Get cached body and params from a StdLib value.
1201 ///
1202 /// Returns `Some((body, params))` if cached, `None` if not yet parsed.
1203 #[inline]
1204 pub fn stdlib_cache(&self, index: ArenaIndex) -> ArenaResult<Option<(ArenaIndex, ArenaIndex)>> {
1205 let val = self.get(index)?;
1206 match val {
1207 Value::StdLib { cache, .. } => {
1208 if cache.is_null() {
1209 Ok(None)
1210 } else {
1211 // cache points to (body . params)
1212 let body = self.car(cache)?;
1213 let params = self.cdr(cache)?;
1214 Ok(Some((body, params)))
1215 }
1216 }
1217 _ => Err(ArenaError::InvalidIndex),
1218 }
1219 }
1220
1221 /// Set the cached body and params for a StdLib value.
1222 #[inline]
1223 pub fn set_stdlib_cache(&self, index: ArenaIndex, body: ArenaIndex, params: ArenaIndex) -> ArenaResult<()> {
1224 let val = self.get(index)?;
1225 match val {
1226 Value::StdLib { func, .. } => {
1227 // Create cons cell (body . params)
1228 let cache = self.cons(body, params)?;
1229 self.set(index, Value::StdLib { func, cache })
1230 }
1231 _ => Err(ArenaError::InvalidIndex),
1232 }
1233 }
1234
1235 /// Allocate a native function reference.
1236 ///
1237 /// Native functions are Rust functions registered with the evaluator.
1238 /// The `id` is the index in the NativeRegistry, and `name_hash` is
1239 /// a simple hash for verification.
1240 #[inline]
1241 pub fn native(&self, id: usize, name_hash: usize) -> ArenaResult<ArenaIndex> {
1242 self.alloc(Value::Native { id, name_hash })
1243 }
1244
1245 /// Allocate a lambda
1246 ///
1247 /// Stores lambda data as a linked structure in the arena: `(params . (body . env))`
1248 pub fn lambda(&self, params: ArenaIndex, body: ArenaIndex, env: ArenaIndex) -> ArenaResult<ArenaIndex> {
1249 // Build (body . env)
1250 let body_env = self.cons(body, env)?;
1251 // Build (params . (body . env))
1252 let data = self.cons(params, body_env)?;
1253 self.alloc(Value::Lambda { data })
1254 }
1255
1256 /// Extract parts from a lambda: (params, body, env)
1257 ///
1258 /// Lambda data is stored as `(params . (body . env))`.
1259 #[inline]
1260 pub fn lambda_parts(&self, index: ArenaIndex) -> ArenaResult<(ArenaIndex, ArenaIndex, ArenaIndex)> {
1261 let val = self.get(index)?;
1262 match val {
1263 Value::Lambda { data } => {
1264 // data = (params . (body . env))
1265 let params = self.car(data)?;
1266 let body_env = self.cdr(data)?;
1267 let body = self.car(body_env)?;
1268 let env = self.cdr(body_env)?;
1269 Ok((params, body, env))
1270 }
1271 _ => Err(ArenaError::InvalidIndex),
1272 }
1273 }
1274
1275 /// Build a list from an iterator of indices
1276 pub fn list<I: IntoIterator<Item = ArenaIndex>>(&self, items: I) -> ArenaResult<ArenaIndex>
1277 where
1278 I::IntoIter: DoubleEndedIterator,
1279 {
1280 let mut result = self.nil()?;
1281 for item in items.into_iter().rev() {
1282 result = self.cons(item, result)?;
1283 }
1284 Ok(result)
1285 }
1286
1287 /// Get the length of a list
1288 pub fn list_len(&self, mut list: ArenaIndex) -> ArenaResult<usize> {
1289 let mut len = 0;
1290 loop {
1291 match self.get(list)? {
1292 Value::Nil => return Ok(len),
1293 Value::Cons { cdr, .. } => {
1294 len += 1;
1295 list = cdr;
1296 }
1297 _ => return Err(ArenaError::InvalidIndex),
1298 }
1299 }
1300 }
1301
1302 /// Check if two symbols are equal.
1303 ///
1304 /// Symbols are compared by their underlying string content.
1305 #[inline]
1306 pub fn symbol_eq(&self, a: ArenaIndex, b: ArenaIndex) -> ArenaResult<bool> {
1307 // Fast path: same index means same symbol
1308 if a == b {
1309 return Ok(true);
1310 }
1311
1312 let val_a = self.get(a)?;
1313 let val_b = self.get(b)?;
1314
1315 match (val_a, val_b) {
1316 (Value::Symbol { chars: chars_a }, Value::Symbol { chars: chars_b }) => {
1317 self.string_eq_contiguous(chars_a, chars_b)
1318 }
1319 _ => Ok(false),
1320 }
1321 }
1322
1323 /// Check if a symbol matches a string.
1324 #[inline]
1325 pub fn symbol_matches(&self, sym: ArenaIndex, name: &str) -> ArenaResult<bool> {
1326 let val = self.get(sym)?;
1327
1328 match val {
1329 Value::Symbol { chars } => self.string_matches(chars, name),
1330 _ => Ok(false),
1331 }
1332 }
1333
1334 /// Extract symbol name to a fixed buffer.
1335 pub fn symbol_to_bytes(&self, sym: ArenaIndex, buf: &mut [u8]) -> ArenaResult<usize> {
1336 let val = self.get(sym)?;
1337
1338 match val {
1339 Value::Symbol { chars } => self.string_to_bytes(chars, buf),
1340 _ => Ok(0),
1341 }
1342 }
1343
1344 /// Get the length of a symbol's name.
1345 pub fn symbol_len(&self, sym: ArenaIndex) -> ArenaResult<usize> {
1346 match self.get(sym)? {
1347 Value::Symbol { chars } => self.string_len(chars),
1348 _ => Ok(0),
1349 }
1350 }
1351
1352 /// Get a character at a specific index within a symbol's name
1353 /// Returns None if the index is out of bounds or if the value is not a symbol
1354 pub fn symbol_char_at(&self, sym: ArenaIndex, index: usize) -> ArenaResult<Option<char>> {
1355 match self.get(sym)? {
1356 Value::Symbol { chars } => {
1357 let len = self.string_len(chars)?;
1358 if index >= len {
1359 Ok(None)
1360 } else {
1361 Ok(Some(self.string_char_at(chars, index)?))
1362 }
1363 }
1364 _ => Ok(None),
1365 }
1366 }
1367
1368 /// Run garbage collection with intern table as an additional root
1369 ///
1370 /// The intern table reference cell (slot 3) is always included as a GC root
1371 /// to prevent interned symbols from being collected. The intern table is
1372 /// stored as a cons cell whose car points to the alist of interned symbols.
1373 ///
1374 /// # Panics
1375 ///
1376 /// Panics if the number of roots exceeds the internal limit (512 roots).
1377 /// This limit is chosen to balance stack usage in no_std environments
1378 /// with typical program needs. Most Lisp programs use far fewer roots.
1379 pub fn gc(&self, roots: &[ArenaIndex]) -> GcStats {
1380 // Create a new roots array with reserved slots and intern table included
1381 // Using const-sized array to avoid alloc in no_std
1382 // 512 roots should be sufficient for most programs while keeping
1383 // stack usage reasonable (~8KB on 64-bit systems)
1384 const MAX_ROOTS: usize = 512;
1385
1386 // Panic if too many roots - this indicates a programming error
1387 // Account for 4 reserved roots (nil, true, false, intern_table)
1388 assert!(roots.len() < MAX_ROOTS - 4,
1389 "Too many GC roots: {} (max {})", roots.len(), MAX_ROOTS - 4 - 1);
1390
1391 let mut all_roots = [ArenaIndex::NULL; MAX_ROOTS];
1392 let mut root_count = 0;
1393
1394 // Add reserved slots as roots to prevent them from being collected
1395 // These slots (nil, true, false) must always be preserved
1396 all_roots[root_count] = self.nil_slot;
1397 root_count += 1;
1398 all_roots[root_count] = self.true_slot;
1399 root_count += 1;
1400 all_roots[root_count] = self.false_slot;
1401 root_count += 1;
1402
1403 // Add intern table reference cell as root
1404 // This is a cons cell whose car is the intern table alist
1405 // Tracing from this cell will reach all interned symbols
1406 all_roots[root_count] = self.intern_table_slot;
1407 root_count += 1;
1408
1409 // Copy provided roots
1410 for &root in roots {
1411 all_roots[root_count] = root;
1412 root_count += 1;
1413 }
1414
1415 self.arena.collect_garbage(&all_roots[..root_count])
1416 }
1417
1418 /// Allocate with GC on failure
1419 pub fn alloc_or_gc(&self, value: Value, roots: &[ArenaIndex]) -> ArenaResult<ArenaIndex> {
1420 self.arena.alloc_or_gc(value, roots)
1421 }
1422
1423 /// Get arena stats
1424 pub fn stats(&self) -> pwn_arena::ArenaStats {
1425 self.arena.stats()
1426 }
1427
1428 // ========================================================================
1429 // Contiguous String Storage
1430 // ========================================================================
1431 //
1432 // Strings are stored as Value::String { data, len } pointing to
1433 // contiguous sequences of Value::Char in the arena:
1434 // [Char(c1), Char(c2), ..., Char(cn)]
1435 //
1436 // This provides:
1437 // - O(1) length lookup (stored in the Value::String variant)
1438 // - Cache-friendly sequential access
1439 // - Consistent design with arrays (single source of truth for length)
1440 // ========================================================================
1441
1442 /// Allocate a string as contiguous Char values.
1443 ///
1444 /// Returns a Value::String that stores the length, with data pointing to
1445 /// the first character slot. Empty strings have data == NULL and len == 0.
1446 ///
1447 /// # Memory Usage
1448 ///
1449 /// Allocates `s.chars().count()` slots for characters, plus 1 slot for
1450 /// the String value itself.
1451 ///
1452 /// # Errors
1453 ///
1454 /// Returns `ArenaError::OutOfMemory` if:
1455 /// - No contiguous block is available
1456 ///
1457 /// # Example
1458 ///
1459 /// ```rust
1460 /// use grift_parser::Lisp;
1461 /// let lisp = Lisp::<1000>::new();
1462 /// let hello = lisp.string("hello").unwrap();
1463 ///
1464 /// assert_eq!(lisp.string_len(hello).unwrap(), 5);
1465 /// assert_eq!(lisp.string_char_at(hello, 0).unwrap(), 'h');
1466 /// ```
1467 pub fn string(&self, s: &str) -> ArenaResult<ArenaIndex> {
1468 let char_count = s.chars().count();
1469
1470 if char_count == 0 {
1471 // Empty string - no data slots needed
1472 return self.alloc(Value::String {
1473 data: ArenaIndex::NULL,
1474 len: 0
1475 });
1476 }
1477
1478 // Allocate contiguous block for characters
1479 let data = self.arena.alloc_contiguous(char_count, Value::Nil)?;
1480
1481 // Set characters in slots
1482 for (i, c) in s.chars().enumerate() {
1483 let char_idx = self.arena.index_at_offset(data, i)?;
1484 self.arena.set(char_idx, Value::Char(c))?;
1485 }
1486
1487 // Create the String value pointing to the data
1488 self.alloc(Value::String { data, len: char_count })
1489 }
1490
1491 /// Allocate a string from a slice of chars.
1492 ///
1493 /// Returns an ArenaIndex pointing to a Value::String.
1494 pub fn string_from_chars(&self, chars: &[char]) -> ArenaResult<ArenaIndex> {
1495 let char_count = chars.len();
1496
1497 if char_count == 0 {
1498 // Empty string - no data slots needed
1499 return self.alloc(Value::String {
1500 data: ArenaIndex::NULL,
1501 len: 0
1502 });
1503 }
1504
1505 // Allocate contiguous block for characters
1506 let data = self.arena.alloc_contiguous(char_count, Value::Nil)?;
1507
1508 // Set characters in slots
1509 for (i, &c) in chars.iter().enumerate() {
1510 let char_idx = self.arena.index_at_offset(data, i)?;
1511 self.arena.set(char_idx, Value::Char(c))?;
1512 }
1513
1514 // Create the String value pointing to the data
1515 self.alloc(Value::String { data, len: char_count })
1516 }
1517
1518 /// Get the length of a string.
1519 ///
1520 /// Returns O(1) since length is stored in the String value.
1521 ///
1522 /// # Errors
1523 ///
1524 /// Returns `ArenaError::InvalidIndex` if the index doesn't point to
1525 /// a valid string.
1526 pub fn string_len(&self, str_idx: ArenaIndex) -> ArenaResult<usize> {
1527 match self.arena.get(str_idx)? {
1528 Value::String { len, .. } => Ok(len),
1529 _ => Err(ArenaError::InvalidIndex),
1530 }
1531 }
1532
1533 /// Get a character at the given index within a string.
1534 ///
1535 /// Character indices are 0-based. Returns O(1) access.
1536 ///
1537 /// # Errors
1538 ///
1539 /// Returns `ArenaError::InvalidIndex` if:
1540 /// - The string index is invalid
1541 /// - The character index is out of bounds
1542 /// - The slot doesn't contain a Char value
1543 pub fn string_char_at(&self, str_idx: ArenaIndex, char_index: usize) -> ArenaResult<char> {
1544 match self.arena.get(str_idx)? {
1545 Value::String { data, len } => {
1546 if char_index >= len {
1547 return Err(ArenaError::InvalidIndex);
1548 }
1549 let char_slot = self.arena.index_at_offset(data, char_index)?;
1550 match self.arena.get(char_slot)? {
1551 Value::Char(c) => Ok(c),
1552 _ => Err(ArenaError::InvalidIndex),
1553 }
1554 }
1555 _ => Err(ArenaError::InvalidIndex),
1556 }
1557 }
1558
1559 /// Compare two strings for equality.
1560 ///
1561 /// # Errors
1562 ///
1563 /// Returns an error if either string index is invalid.
1564 pub fn string_eq_contiguous(&self, a: ArenaIndex, b: ArenaIndex) -> ArenaResult<bool> {
1565 // Fast path: same index
1566 if a == b {
1567 return Ok(true);
1568 }
1569
1570 let len_a = self.string_len(a)?;
1571 let len_b = self.string_len(b)?;
1572
1573 if len_a != len_b {
1574 return Ok(false);
1575 }
1576
1577 for i in 0..len_a {
1578 let char_a = self.string_char_at(a, i)?;
1579 let char_b = self.string_char_at(b, i)?;
1580 if char_a != char_b {
1581 return Ok(false);
1582 }
1583 }
1584
1585 Ok(true)
1586 }
1587
1588 /// Check if a string matches a Rust string slice.
1589 ///
1590 /// # Errors
1591 ///
1592 /// Returns an error if the string index is invalid.
1593 pub fn string_matches(&self, str_idx: ArenaIndex, s: &str) -> ArenaResult<bool> {
1594 let len = self.string_len(str_idx)?;
1595 let s_len = s.chars().count();
1596
1597 if len != s_len {
1598 return Ok(false);
1599 }
1600
1601 for (i, expected) in s.chars().enumerate() {
1602 let actual = self.string_char_at(str_idx, i)?;
1603 if actual != expected {
1604 return Ok(false);
1605 }
1606 }
1607
1608 Ok(true)
1609 }
1610
1611 /// Copy a string's contents to a byte buffer.
1612 ///
1613 /// Returns the number of bytes written. Only ASCII characters (0-127)
1614 /// are copied; non-ASCII characters are skipped.
1615 ///
1616 /// # Warning
1617 ///
1618 /// This method is designed for ASCII strings. For strings containing
1619 /// non-ASCII Unicode characters, some characters will be skipped and
1620 /// the byte count may not match the character count.
1621 ///
1622 /// # Errors
1623 ///
1624 /// Returns an error if the string index is invalid.
1625 pub fn string_to_bytes(&self, str_idx: ArenaIndex, buf: &mut [u8]) -> ArenaResult<usize> {
1626 let len = self.string_len(str_idx)?;
1627 let mut buf_idx = 0;
1628
1629 for i in 0..len {
1630 if buf_idx >= buf.len() {
1631 break;
1632 }
1633 let c = self.string_char_at(str_idx, i)?;
1634 // Only copy ASCII characters (0-127)
1635 if c.is_ascii() {
1636 buf[buf_idx] = c as u8;
1637 buf_idx += 1;
1638 }
1639 // Non-ASCII characters are skipped
1640 }
1641
1642 Ok(buf_idx)
1643 }
1644
1645 /// Free a string and all its character slots.
1646 ///
1647 /// # Errors
1648 ///
1649 /// Returns an error if the string index is invalid.
1650 pub fn string_free(&self, str_idx: ArenaIndex) -> ArenaResult<()> {
1651 match self.arena.get(str_idx)? {
1652 Value::String { data, len } => {
1653 // Free the data slots
1654 if len > 0 {
1655 self.arena.free_contiguous(data, len)?;
1656 }
1657 // Free the String value itself
1658 self.arena.free(str_idx)
1659 }
1660 _ => Err(ArenaError::InvalidIndex),
1661 }
1662 }
1663
1664 // ========================================================================
1665 // Contiguous Array Storage
1666 // ========================================================================
1667 //
1668 // Arrays are stored as contiguous sequences of Value in the arena.
1669 // Unlike strings which have a length slot, arrays store the length in
1670 // the Value::Array variant itself.
1671 //
1672 // This provides:
1673 // - O(1) indexed access and mutation
1674 // - Cache-friendly sequential access
1675 // - Efficient memory layout
1676 // ========================================================================
1677
1678 /// Create an array with the given length, initialized with a default value.
1679 ///
1680 /// The array stores `len` values contiguously in the arena.
1681 ///
1682 /// # Memory Usage
1683 ///
1684 /// Allocates `len` slots for element storage, plus 1 slot for the Array value itself.
1685 ///
1686 /// # Example
1687 ///
1688 /// ```rust
1689 /// use grift_parser::Lisp;
1690 /// let lisp = Lisp::<1000>::new();
1691 /// let arr = lisp.make_array(3, lisp.nil().unwrap()).unwrap();
1692 ///
1693 /// assert_eq!(lisp.array_len(arr).unwrap(), 3);
1694 /// ```
1695 pub fn make_array(&self, len: usize, default: ArenaIndex) -> ArenaResult<ArenaIndex> {
1696 if len == 0 {
1697 // Empty array - no data slots needed
1698 return self.alloc(Value::Array {
1699 data: ArenaIndex::NULL,
1700 len: 0
1701 });
1702 }
1703
1704 // Allocate contiguous block for elements
1705 let default_val = self.arena.get(default)?;
1706 let data = self.arena.alloc_contiguous(len, default_val)?;
1707
1708 // Create the Array value pointing to the data
1709 self.alloc(Value::Array { data, len })
1710 }
1711
1712 /// Get the length of an array.
1713 ///
1714 /// Returns O(1) since length is stored in the Array value.
1715 ///
1716 /// # Errors
1717 ///
1718 /// Returns `ArenaError::InvalidIndex` if the index doesn't point to an array.
1719 pub fn array_len(&self, arr_idx: ArenaIndex) -> ArenaResult<usize> {
1720 match self.arena.get(arr_idx)? {
1721 Value::Array { len, .. } => Ok(len),
1722 _ => Err(ArenaError::InvalidIndex),
1723 }
1724 }
1725
1726 /// Get the element at the given index within an array.
1727 ///
1728 /// Returns O(1) access via direct index calculation.
1729 ///
1730 /// # Errors
1731 ///
1732 /// Returns `ArenaError::InvalidIndex` if:
1733 /// - The array index is invalid
1734 /// - The element index is out of bounds
1735 pub fn array_get(&self, arr_idx: ArenaIndex, index: usize) -> ArenaResult<ArenaIndex> {
1736 match self.arena.get(arr_idx)? {
1737 Value::Array { data, len } => {
1738 if index >= len {
1739 return Err(ArenaError::InvalidIndex);
1740 }
1741 self.arena.index_at_offset(data, index)
1742 }
1743 _ => Err(ArenaError::InvalidIndex),
1744 }
1745 }
1746
1747 /// Set the element at the given index within an array.
1748 ///
1749 /// Returns O(1) mutation via direct index calculation.
1750 ///
1751 /// # Errors
1752 ///
1753 /// Returns `ArenaError::InvalidIndex` if:
1754 /// - The array index is invalid
1755 /// - The element index is out of bounds
1756 pub fn array_set(&self, arr_idx: ArenaIndex, index: usize, value: ArenaIndex) -> ArenaResult<()> {
1757 match self.arena.get(arr_idx)? {
1758 Value::Array { data, len } => {
1759 if index >= len {
1760 return Err(ArenaError::InvalidIndex);
1761 }
1762 let elem_slot = self.arena.index_at_offset(data, index)?;
1763 let val = self.arena.get(value)?;
1764 self.arena.set(elem_slot, val)
1765 }
1766 _ => Err(ArenaError::InvalidIndex),
1767 }
1768 }
1769
1770 /// Free an array and all its element slots.
1771 ///
1772 /// # Errors
1773 ///
1774 /// Returns an error if the array index is invalid.
1775 pub fn array_free(&self, arr_idx: ArenaIndex) -> ArenaResult<()> {
1776 match self.arena.get(arr_idx)? {
1777 Value::Array { data, len } => {
1778 // Free the data slots
1779 if len > 0 {
1780 self.arena.free_contiguous(data, len)?;
1781 }
1782 // Free the Array value itself
1783 self.arena.free(arr_idx)
1784 }
1785 _ => Err(ArenaError::InvalidIndex),
1786 }
1787 }
1788}
1789
1790impl<const N: usize> Default for Lisp<N> {
1791 fn default() -> Self {
1792 Self::new()
1793 }
1794}
1795
1796// ============================================================================
1797// Parser
1798// ============================================================================
1799
1800/// Source location for error reporting
1801#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
1802pub struct SourceLoc {
1803 pub line: usize,
1804 pub column: usize,
1805}
1806
1807/// Parser error with location
1808#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1809pub struct ParseError {
1810 pub kind: ParseErrorKind,
1811 pub loc: SourceLoc,
1812}
1813
1814#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1815pub enum ParseErrorKind {
1816 /// Unexpected end of input
1817 UnexpectedEof,
1818 /// Unexpected character
1819 UnexpectedChar(char),
1820 /// Unmatched parenthesis
1821 UnmatchedParen,
1822 /// Number too large
1823 NumberOverflow,
1824 /// Arena is full
1825 OutOfMemory,
1826 /// Invalid hash literal
1827 InvalidHashLiteral,
1828 /// Invalid character literal
1829 InvalidCharLiteral,
1830 /// Invalid string escape sequence
1831 InvalidEscapeSequence,
1832 /// Unterminated string literal
1833 UnterminatedString,
1834 /// Vector literal exceeds maximum size (256 elements in no_std)
1835 VectorLiteralTooLarge,
1836}
1837
1838impl ParseError {
1839 pub fn new(kind: ParseErrorKind, line: usize, column: usize) -> Self {
1840 ParseError { kind, loc: SourceLoc { line, column } }
1841 }
1842}
1843
1844impl From<ArenaError> for ParseError {
1845 fn from(e: ArenaError) -> Self {
1846 ParseError {
1847 kind: match e {
1848 ArenaError::OutOfMemory => ParseErrorKind::OutOfMemory,
1849 _ => ParseErrorKind::OutOfMemory,
1850 },
1851 loc: SourceLoc::default(),
1852 }
1853 }
1854}
1855
1856/// Parser state
1857pub struct Parser<'a> {
1858 input: &'a [u8],
1859 pos: usize,
1860 line: usize,
1861 column: usize,
1862}
1863
1864impl<'a> Parser<'a> {
1865 /// Create a new parser
1866 pub fn new(input: &'a str) -> Self {
1867 Parser {
1868 input: input.as_bytes(),
1869 pos: 0,
1870 line: 1,
1871 column: 1,
1872 }
1873 }
1874
1875 /// Create from bytes
1876 pub fn from_bytes(input: &'a [u8]) -> Self {
1877 Parser { input, pos: 0, line: 1, column: 1 }
1878 }
1879
1880 /// Get current source location
1881 fn loc(&self) -> SourceLoc {
1882 SourceLoc { line: self.line, column: self.column }
1883 }
1884
1885 /// Create an error at current location
1886 fn error(&self, kind: ParseErrorKind) -> ParseError {
1887 ParseError { kind, loc: self.loc() }
1888 }
1889
1890 /// Peek at the current character
1891 fn peek(&self) -> Option<u8> {
1892 self.input.get(self.pos).copied()
1893 }
1894
1895 /// Peek at the next character (lookahead)
1896 fn peek_next(&self) -> Option<u8> {
1897 self.input.get(self.pos + 1).copied()
1898 }
1899
1900 /// Advance and return the current character
1901 fn advance(&mut self) -> Option<u8> {
1902 let c = self.peek()?;
1903 self.pos += 1;
1904 if c == b'\n' {
1905 self.line += 1;
1906 self.column = 1;
1907 } else {
1908 self.column += 1;
1909 }
1910 Some(c)
1911 }
1912
1913 /// Skip whitespace and comments
1914 fn skip_whitespace(&mut self) {
1915 while let Some(c) = self.peek() {
1916 if c.is_ascii_whitespace() {
1917 self.advance();
1918 } else if c == b';' {
1919 // Comment - skip to end of line
1920 while let Some(c) = self.advance() {
1921 if c == b'\n' {
1922 break;
1923 }
1924 }
1925 } else {
1926 break;
1927 }
1928 }
1929 }
1930
1931 /// Check if a character can be part of a symbol
1932 fn is_symbol_char(c: u8) -> bool {
1933 matches!(c,
1934 b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' |
1935 b'+' | b'-' | b'*' | b'/' | b'<' | b'>' | b'=' |
1936 b'?' | b'!' | b'_' | b'&' | b'%' | b'^' | b'~' | b'.'
1937 )
1938 }
1939
1940 /// Parse a single expression
1941 pub fn parse<const N: usize>(&mut self, lisp: &Lisp<N>) -> Result<ArenaIndex, ParseError> {
1942 self.skip_whitespace();
1943
1944 match self.peek() {
1945 None => Err(self.error(ParseErrorKind::UnexpectedEof)),
1946
1947 Some(b'(') => self.parse_list(lisp),
1948
1949 Some(b')') => Err(self.error(ParseErrorKind::UnmatchedParen)),
1950
1951 Some(b'"') => self.parse_string(lisp),
1952
1953 Some(b'\'') => {
1954 // Quote: 'x -> (quote x)
1955 self.advance();
1956 let expr = self.parse(lisp)?;
1957 let quote_sym = lisp.symbol("quote")?;
1958 let nil = lisp.nil()?;
1959 let quoted = lisp.cons(expr, nil)?;
1960 lisp.cons(quote_sym, quoted).map_err(Into::into)
1961 }
1962
1963 Some(b'#') => self.parse_hash_literal(lisp),
1964
1965 Some(c) if c.is_ascii_digit() => self.parse_number(lisp),
1966
1967 Some(b'-') => {
1968 // Could be negative number or symbol
1969 if self.peek_next().map_or(false, |c| c.is_ascii_digit()) {
1970 self.parse_number(lisp)
1971 } else {
1972 self.parse_symbol(lisp)
1973 }
1974 }
1975
1976 Some(c) if Self::is_symbol_char(c) => self.parse_symbol(lisp),
1977
1978 Some(c) => Err(self.error(ParseErrorKind::UnexpectedChar(c as char))),
1979 }
1980 }
1981
1982 /// Parse hash literals (#t, #f, #\char, #(vector), etc.)
1983 fn parse_hash_literal<const N: usize>(&mut self, lisp: &Lisp<N>) -> Result<ArenaIndex, ParseError> {
1984 self.advance(); // consume '#'
1985
1986 match self.peek() {
1987 Some(b't') | Some(b'T') => {
1988 self.advance();
1989 lisp.true_val().map_err(Into::into)
1990 }
1991 Some(b'f') | Some(b'F') => {
1992 self.advance();
1993 lisp.false_val().map_err(Into::into)
1994 }
1995 Some(b'\\') => self.parse_char_literal(lisp),
1996 Some(b'(') => self.parse_vector_literal(lisp),
1997 Some(_) => Err(self.error(ParseErrorKind::InvalidHashLiteral)),
1998 None => Err(self.error(ParseErrorKind::UnexpectedEof)),
1999 }
2000 }
2001
2002 /// Parse vector literal #(obj ...)
2003 ///
2004 /// Note: In no_std environments, vector literals are limited to 256 elements
2005 /// due to stack allocation constraints. Use `make-vector` or `vector` for
2006 /// larger vectors.
2007 fn parse_vector_literal<const N: usize>(&mut self, lisp: &Lisp<N>) -> Result<ArenaIndex, ParseError> {
2008 self.advance(); // consume '('
2009
2010 // Parse elements into a stack-allocated array (no_std constraint)
2011 // Maximum 256 elements for literals; use make-vector for larger vectors
2012 let mut elements: [ArenaIndex; 256] = [ArenaIndex::NULL; 256];
2013 let mut count = 0usize;
2014
2015 self.skip_whitespace();
2016 while let Some(c) = self.peek() {
2017 if c == b')' {
2018 break;
2019 }
2020
2021 if count >= 256 {
2022 return Err(self.error(ParseErrorKind::VectorLiteralTooLarge));
2023 }
2024
2025 let elem = self.parse(lisp)?;
2026 elements[count] = elem;
2027 count += 1;
2028
2029 self.skip_whitespace();
2030 }
2031
2032 // Consume closing paren
2033 match self.advance() {
2034 Some(b')') => {}
2035 _ => return Err(self.error(ParseErrorKind::UnmatchedParen)),
2036 }
2037
2038 // Create the vector
2039 let placeholder = lisp.number(0)?;
2040 let vec = lisp.make_array(count, placeholder)?;
2041
2042 // Fill in elements
2043 for i in 0..count {
2044 lisp.array_set(vec, i, elements[i])?;
2045 }
2046
2047 Ok(vec)
2048 }
2049
2050 /// Parse character literal (#\a, #\space, #\newline, etc.)
2051 fn parse_char_literal<const N: usize>(&mut self, lisp: &Lisp<N>) -> Result<ArenaIndex, ParseError> {
2052 self.advance(); // consume '\'
2053
2054 // First check for named characters
2055 // We need to look ahead to see if there's a multi-char name
2056 let start = self.pos;
2057
2058 // Read characters that could form a name
2059 while let Some(c) = self.peek() {
2060 if Self::is_symbol_char(c) {
2061 self.advance();
2062 } else {
2063 break;
2064 }
2065 }
2066
2067 let name_len = self.pos - start;
2068
2069 if name_len == 0 {
2070 // #\ followed by non-symbol character like space: #\
2071 return match self.peek() {
2072 Some(c) => {
2073 self.advance();
2074 lisp.char(c as char).map_err(Into::into)
2075 }
2076 None => Err(self.error(ParseErrorKind::UnexpectedEof)),
2077 };
2078 }
2079
2080 // Get the name as a slice
2081 let name = &self.input[start..self.pos];
2082
2083 // If it's a single character, return it directly
2084 if name_len == 1 {
2085 return lisp.char(name[0] as char).map_err(Into::into);
2086 }
2087
2088 // Check for named characters (R7RS Section 6.6)
2089 match name {
2090 b"alarm" => lisp.char('\x07').map_err(Into::into),
2091 b"backspace" => lisp.char('\x08').map_err(Into::into),
2092 b"delete" => lisp.char('\x7F').map_err(Into::into),
2093 b"escape" => lisp.char('\x1B').map_err(Into::into),
2094 b"newline" => lisp.char('\n').map_err(Into::into),
2095 b"null" => lisp.char('\0').map_err(Into::into),
2096 b"return" => lisp.char('\r').map_err(Into::into),
2097 b"space" => lisp.char(' ').map_err(Into::into),
2098 b"tab" => lisp.char('\t').map_err(Into::into),
2099 _ => {
2100 // Check for hex character #\xNN...
2101 if name.len() >= 2 && (name[0] == b'x' || name[0] == b'X') {
2102 let hex_str = &name[1..];
2103 if let Some(code) = Self::parse_hex(hex_str) {
2104 if let Some(c) = char::from_u32(code) {
2105 return lisp.char(c).map_err(Into::into);
2106 }
2107 }
2108 }
2109 Err(self.error(ParseErrorKind::InvalidCharLiteral))
2110 }
2111 }
2112 }
2113
2114 /// Parse hex digits into a u32 value
2115 fn parse_hex(bytes: &[u8]) -> Option<u32> {
2116 if bytes.is_empty() {
2117 return None;
2118 }
2119 let mut result: u32 = 0;
2120 for &b in bytes {
2121 let digit = match b {
2122 b'0'..=b'9' => (b - b'0') as u32,
2123 b'a'..=b'f' => (b - b'a' + 10) as u32,
2124 b'A'..=b'F' => (b - b'A' + 10) as u32,
2125 _ => return None,
2126 };
2127 result = result.checked_mul(16)?.checked_add(digit)?;
2128 }
2129 Some(result)
2130 }
2131
2132 /// Parse a string literal ("..." with escape sequences)
2133 fn parse_string<const N: usize>(&mut self, lisp: &Lisp<N>) -> Result<ArenaIndex, ParseError> {
2134 self.advance(); // consume opening '"'
2135
2136 // Collect characters into a fixed-size buffer
2137 const MAX_STRING_LEN: usize = 1024;
2138 let mut chars: [char; MAX_STRING_LEN] = ['\0'; MAX_STRING_LEN];
2139 let mut len = 0;
2140
2141 loop {
2142 match self.peek() {
2143 None => return Err(self.error(ParseErrorKind::UnterminatedString)),
2144 Some(b'"') => {
2145 self.advance(); // consume closing '"'
2146 break;
2147 }
2148 Some(b'\\') => {
2149 // Escape sequence
2150 self.advance(); // consume '\'
2151 let c = match self.peek() {
2152 None => return Err(self.error(ParseErrorKind::UnterminatedString)),
2153 Some(b'a') => { self.advance(); '\x07' } // alarm
2154 Some(b'b') => { self.advance(); '\x08' } // backspace
2155 Some(b't') => { self.advance(); '\t' } // tab
2156 Some(b'n') => { self.advance(); '\n' } // newline
2157 Some(b'r') => { self.advance(); '\r' } // return
2158 Some(b'"') => { self.advance(); '"' } // double quote
2159 Some(b'\\') => { self.advance(); '\\' } // backslash
2160 Some(b'|') => { self.advance(); '|' } // vertical line
2161 Some(b'x') => {
2162 // Hex escape: \xNN;
2163 self.advance(); // consume 'x'
2164 let hex_start = self.pos;
2165 while let Some(c) = self.peek() {
2166 if c == b';' {
2167 break;
2168 }
2169 if c.is_ascii_hexdigit() {
2170 self.advance();
2171 } else {
2172 return Err(self.error(ParseErrorKind::InvalidEscapeSequence));
2173 }
2174 }
2175 let hex_bytes = &self.input[hex_start..self.pos];
2176 if self.peek() != Some(b';') {
2177 return Err(self.error(ParseErrorKind::InvalidEscapeSequence));
2178 }
2179 self.advance(); // consume ';'
2180 match Self::parse_hex(hex_bytes) {
2181 Some(code) => {
2182 match char::from_u32(code) {
2183 Some(ch) => ch,
2184 None => return Err(self.error(ParseErrorKind::InvalidEscapeSequence)),
2185 }
2186 }
2187 None => return Err(self.error(ParseErrorKind::InvalidEscapeSequence)),
2188 }
2189 }
2190 Some(b'\n') | Some(b'\r') => {
2191 // Line continuation: skip the line ending and any intraline whitespace on next line
2192 // Per R7RS, skip only the first line ending, then intraline whitespace
2193 self.advance(); // consume the \n or \r
2194 // Handle \r\n as a single line ending
2195 if self.peek() == Some(b'\n') {
2196 self.advance();
2197 }
2198 // Skip intraline whitespace on next line (spaces and tabs only, not newlines)
2199 while let Some(c) = self.peek() {
2200 if c == b' ' || c == b'\t' {
2201 self.advance();
2202 } else {
2203 break;
2204 }
2205 }
2206 continue; // Don't add any character
2207 }
2208 Some(c) if c == b' ' || c == b'\t' => {
2209 // \<intraline whitespace>*<line ending> - skip whitespace until line ending
2210 while let Some(c) = self.peek() {
2211 if c == b' ' || c == b'\t' {
2212 self.advance();
2213 } else if c == b'\n' || c == b'\r' {
2214 self.advance();
2215 // Handle \r\n as a single line ending
2216 if c == b'\r' && self.peek() == Some(b'\n') {
2217 self.advance();
2218 }
2219 // Skip trailing whitespace on next line (intraline only)
2220 while let Some(c) = self.peek() {
2221 if c == b' ' || c == b'\t' {
2222 self.advance();
2223 } else {
2224 break;
2225 }
2226 }
2227 break;
2228 } else {
2229 return Err(self.error(ParseErrorKind::InvalidEscapeSequence));
2230 }
2231 }
2232 continue; // Don't add any character
2233 }
2234 Some(_) => return Err(self.error(ParseErrorKind::InvalidEscapeSequence)),
2235 };
2236 if len >= MAX_STRING_LEN {
2237 return Err(self.error(ParseErrorKind::OutOfMemory));
2238 }
2239 chars[len] = c;
2240 len += 1;
2241 }
2242 Some(c) => {
2243 if len >= MAX_STRING_LEN {
2244 return Err(self.error(ParseErrorKind::OutOfMemory));
2245 }
2246 chars[len] = c as char;
2247 len += 1;
2248 self.advance();
2249 }
2250 }
2251 }
2252
2253 // Allocate the string in the arena
2254 lisp.string_from_chars(&chars[..len]).map_err(Into::into)
2255 }
2256
2257 /// Parse a list (including nil)
2258 fn parse_list<const N: usize>(&mut self, lisp: &Lisp<N>) -> Result<ArenaIndex, ParseError> {
2259 self.advance(); // consume '('
2260 self.skip_whitespace();
2261
2262 if self.peek() == Some(b')') {
2263 self.advance();
2264 return lisp.nil().map_err(Into::into);
2265 }
2266
2267 // Parse elements and build list
2268 const MAX_LIST_DEPTH: usize = 128;
2269 let mut elements: [ArenaIndex; MAX_LIST_DEPTH] = [ArenaIndex::NULL; MAX_LIST_DEPTH];
2270 let mut count = 0;
2271
2272 loop {
2273 self.skip_whitespace();
2274
2275 match self.peek() {
2276 None => return Err(self.error(ParseErrorKind::UnexpectedEof)),
2277 Some(b')') => {
2278 self.advance();
2279 break;
2280 }
2281 Some(b'.') => {
2282 // Dotted pair: (a . b)
2283 self.advance();
2284 self.skip_whitespace();
2285
2286 if count == 0 {
2287 return Err(self.error(ParseErrorKind::UnexpectedChar('.')));
2288 }
2289
2290 let cdr = self.parse(lisp)?;
2291 self.skip_whitespace();
2292
2293 if self.advance() != Some(b')') {
2294 return Err(self.error(ParseErrorKind::UnmatchedParen));
2295 }
2296
2297 // Build the dotted list
2298 let mut result = cdr;
2299 for i in (0..count).rev() {
2300 result = lisp.cons(elements[i], result)?;
2301 }
2302 return Ok(result);
2303 }
2304 Some(_) => {
2305 if count >= MAX_LIST_DEPTH {
2306 return Err(self.error(ParseErrorKind::OutOfMemory));
2307 }
2308 elements[count] = self.parse(lisp)?;
2309 count += 1;
2310 }
2311 }
2312 }
2313
2314 // Build proper list
2315 let mut result = lisp.nil()?;
2316 for i in (0..count).rev() {
2317 result = lisp.cons(elements[i], result)?;
2318 }
2319 Ok(result)
2320 }
2321
2322 /// Parse a number (integer or float)
2323 fn parse_number<const N: usize>(&mut self, lisp: &Lisp<N>) -> Result<ArenaIndex, ParseError> {
2324 let negative = if self.peek() == Some(b'-') {
2325 self.advance();
2326 true
2327 } else {
2328 false
2329 };
2330
2331 // Parse integer part
2332 let mut int_value: isize = 0;
2333
2334 while let Some(c) = self.peek() {
2335 if c.is_ascii_digit() {
2336 self.advance();
2337 int_value = int_value.checked_mul(10)
2338 .and_then(|v| v.checked_add((c - b'0') as isize))
2339 .ok_or_else(|| self.error(ParseErrorKind::NumberOverflow))?;
2340 } else {
2341 break;
2342 }
2343 }
2344
2345 // Check for decimal point or exponent
2346 let is_float = matches!(self.peek(), Some(b'.') | Some(b'e') | Some(b'E'));
2347
2348 if is_float {
2349 // Parse as floating point
2350 let mut float_value = int_value as f64;
2351
2352 // Parse fractional part
2353 if self.peek() == Some(b'.') {
2354 self.advance();
2355 let mut frac_mult = 0.1;
2356 while let Some(c) = self.peek() {
2357 if c.is_ascii_digit() {
2358 self.advance();
2359 float_value += (c - b'0') as f64 * frac_mult;
2360 frac_mult *= 0.1;
2361 } else {
2362 break;
2363 }
2364 }
2365 }
2366
2367 // Parse exponent
2368 if matches!(self.peek(), Some(b'e') | Some(b'E')) {
2369 self.advance();
2370 let exp_negative = match self.peek() {
2371 Some(b'-') => {
2372 self.advance();
2373 true
2374 }
2375 Some(b'+') => {
2376 self.advance();
2377 false
2378 }
2379 _ => false,
2380 };
2381
2382 let mut exp_value: i32 = 0;
2383 while let Some(c) = self.peek() {
2384 if c.is_ascii_digit() {
2385 self.advance();
2386 exp_value = exp_value.saturating_mul(10).saturating_add((c - b'0') as i32);
2387 } else {
2388 break;
2389 }
2390 }
2391
2392 if exp_negative {
2393 exp_value = -exp_value;
2394 }
2395
2396 // Manual power of 10 calculation (no libm)
2397 float_value = Self::multiply_by_pow10(float_value, exp_value);
2398 }
2399
2400 if negative {
2401 float_value = -float_value;
2402 }
2403
2404 lisp.float(float_value).map_err(Into::into)
2405 } else {
2406 // Return as integer
2407 if negative {
2408 int_value = -int_value;
2409 }
2410 lisp.number(int_value).map_err(Into::into)
2411 }
2412 }
2413
2414 /// Multiply a float by 10^exp without using libm
2415 fn multiply_by_pow10(value: f64, exp: i32) -> f64 {
2416 if exp == 0 {
2417 return value;
2418 }
2419
2420 let mut result = value;
2421 let mut e = exp.abs();
2422 let mut multiplier = if exp > 0 { 10.0 } else { 0.1 };
2423
2424 while e > 0 {
2425 if e & 1 == 1 {
2426 result *= multiplier;
2427 }
2428 multiplier *= multiplier;
2429 e >>= 1;
2430 }
2431
2432 result
2433 }
2434
2435 /// Parse a symbol
2436 fn parse_symbol<const N: usize>(&mut self, lisp: &Lisp<N>) -> Result<ArenaIndex, ParseError> {
2437 const MAX_SYMBOL_LEN: usize = 64;
2438 let mut buffer: [u8; MAX_SYMBOL_LEN] = [0; MAX_SYMBOL_LEN];
2439 let mut len = 0;
2440
2441 while let Some(c) = self.peek() {
2442 if Self::is_symbol_char(c) && len < MAX_SYMBOL_LEN {
2443 buffer[len] = c.to_ascii_lowercase();
2444 len += 1;
2445 self.advance();
2446 } else {
2447 break;
2448 }
2449 }
2450
2451 let name = &buffer[..len];
2452
2453 // Check for special float literals (R7RS)
2454 // +nan.0, -nan.0, +inf.0, -inf.0
2455 if len == 6 {
2456 if name == b"+nan.0" || name == b"-nan.0" {
2457 return lisp.float(f64::NAN).map_err(Into::into);
2458 }
2459 if name == b"+inf.0" {
2460 return lisp.float(f64::INFINITY).map_err(Into::into);
2461 }
2462 if name == b"-inf.0" {
2463 return lisp.float(f64::NEG_INFINITY).map_err(Into::into);
2464 }
2465 }
2466
2467 // Note: In Scheme, nil is just a regular symbol.
2468 // The empty list is written as () or '() only.
2469 // No special handling for 'nil' - it's parsed as a regular symbol.
2470
2471 lisp.symbol_from_bytes(name).map_err(Into::into)
2472 }
2473
2474 /// Check if there's more input (after whitespace)
2475 pub fn has_more(&mut self) -> bool {
2476 self.skip_whitespace();
2477 self.peek().is_some()
2478 }
2479
2480 /// Get current position for error reporting
2481 pub fn position(&self) -> (usize, usize) {
2482 (self.line, self.column)
2483 }
2484}
2485
2486/// Parse a string into a Lisp expression
2487pub fn parse<const N: usize>(lisp: &Lisp<N>, input: &str) -> Result<ArenaIndex, ParseError> {
2488 let mut parser = Parser::new(input);
2489 parser.parse(lisp)
2490}
2491
2492/// Parse multiple expressions
2493pub fn parse_all<const N: usize>(lisp: &Lisp<N>, input: &str) -> Result<ArenaIndex, ParseError> {
2494 let mut parser = Parser::new(input);
2495 let mut results = [ArenaIndex::NULL; 64];
2496 let mut count = 0;
2497
2498 while parser.has_more() && count < 64 {
2499 results[count] = parser.parse(lisp)?;
2500 count += 1;
2501 }
2502
2503 // Build list of results
2504 let mut result = lisp.nil()?;
2505 for i in (0..count).rev() {
2506 result = lisp.cons(results[i], result)?;
2507 }
2508 Ok(result)
2509}
2510