engawa-lisp 0.1.3

Tatara-lisp authoring layer for engawa render graphs. Operators write (defmaterial …) / (defgraph …) / (defeffect …) in a .tlisp file; this crate parses + lowers to engawa::RenderGraph. Pairs with shikumi's notify watcher for hot-reload.
Documentation
//! Minimal s-expression tokenizer + parser.
//!
//! Supports:
//! * `()` lists, arbitrary nesting
//! * symbols (`[a-zA-Z_-][a-zA-Z0-9_-]*`)
//! * `"double-quoted strings"` with `\"`, `\\`, `\n` escapes
//! * numbers (`[+-]?[0-9]+(\.[0-9]+)?`)
//! * `; line comment` to end of line
//! * Whitespace separation
//!
//! Does NOT support: quasiquote, backtick, splice, character
//! literals, multi-line strings. Add as the lisp surface grows.

use thiserror::Error;

use crate::sexpr::{Sexpr, SexprKind};

/// Source-position span. Both bounds are inclusive byte offsets
/// into the original source string. Useful for operator error
/// messages.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
    pub start: usize,
    pub end: usize,
    pub line: usize,
    pub column: usize,
}

#[derive(Debug, Error, Clone, PartialEq)]
pub enum ParseError {
    #[error("unexpected character {ch:?} at line {line}, column {column}")]
    UnexpectedChar {
        ch: char,
        line: usize,
        column: usize,
    },
    #[error("unterminated string starting at line {line}, column {column}")]
    UnterminatedString { line: usize, column: usize },
    #[error("unterminated list (missing ')') opened at line {line}, column {column}")]
    UnterminatedList { line: usize, column: usize },
    #[error("unexpected ')' at line {line}, column {column}")]
    UnexpectedRParen { line: usize, column: usize },
    #[error("invalid escape sequence \\{ch} at line {line}, column {column}")]
    InvalidEscape {
        ch: char,
        line: usize,
        column: usize,
    },
}

/// Parse a source string into a vector of top-level sexprs.
pub fn parse(source: &str) -> Result<Vec<Sexpr>, ParseError> {
    let mut p = Parser::new(source);
    let mut out = Vec::new();
    loop {
        p.skip_whitespace_and_comments();
        if p.eof() {
            return Ok(out);
        }
        out.push(p.parse_one()?);
    }
}

struct Parser<'a> {
    source: &'a [u8],
    pos: usize,
    line: usize,
    col: usize,
}

impl<'a> Parser<'a> {
    fn new(source: &'a str) -> Self {
        Self {
            source: source.as_bytes(),
            pos: 0,
            line: 1,
            col: 1,
        }
    }

    fn eof(&self) -> bool {
        self.pos >= self.source.len()
    }

    fn peek(&self) -> Option<u8> {
        self.source.get(self.pos).copied()
    }

    fn advance(&mut self) -> Option<u8> {
        let b = self.peek()?;
        self.pos += 1;
        if b == b'\n' {
            self.line += 1;
            self.col = 1;
        } else {
            self.col += 1;
        }
        Some(b)
    }

    fn skip_whitespace_and_comments(&mut self) {
        while let Some(b) = self.peek() {
            match b {
                b' ' | b'\t' | b'\r' | b'\n' => {
                    self.advance();
                }
                b';' => {
                    while let Some(b) = self.peek() {
                        if b == b'\n' {
                            break;
                        }
                        self.advance();
                    }
                }
                _ => break,
            }
        }
    }

    fn parse_one(&mut self) -> Result<Sexpr, ParseError> {
        self.skip_whitespace_and_comments();
        let start = self.pos;
        let line = self.line;
        let col = self.col;
        let Some(b) = self.peek() else {
            return Err(ParseError::UnexpectedChar {
                ch: '\0',
                line,
                column: col,
            });
        };
        match b {
            b'(' => self.parse_list(),
            b')' => Err(ParseError::UnexpectedRParen { line, column: col }),
            b'"' => self.parse_string(),
            b if b.is_ascii_digit() || b == b'-' || b == b'+' => {
                // Negative numbers OR symbols starting with - .
                // Disambiguate by peeking the second byte.
                if (b == b'-' || b == b'+')
                    && self
                        .source
                        .get(self.pos + 1)
                        .is_none_or(|c| !c.is_ascii_digit())
                {
                    self.parse_symbol(start, line, col)
                } else {
                    self.parse_number(start, line, col)
                }
            }
            b if b.is_ascii_alphabetic() || b == b'_' => {
                self.parse_symbol(start, line, col)
            }
            _ => Err(ParseError::UnexpectedChar {
                ch: b as char,
                line,
                column: col,
            }),
        }
    }

    fn parse_list(&mut self) -> Result<Sexpr, ParseError> {
        let start = self.pos;
        let line = self.line;
        let col = self.col;
        self.advance(); // consume '('
        let mut items = Vec::new();
        loop {
            self.skip_whitespace_and_comments();
            match self.peek() {
                None => {
                    return Err(ParseError::UnterminatedList { line, column: col });
                }
                Some(b')') => {
                    self.advance();
                    let end = self.pos;
                    return Ok(Sexpr {
                        kind: SexprKind::List(items),
                        span: Span {
                            start,
                            end,
                            line,
                            column: col,
                        },
                    });
                }
                _ => {
                    items.push(self.parse_one()?);
                }
            }
        }
    }

    fn parse_string(&mut self) -> Result<Sexpr, ParseError> {
        let start = self.pos;
        let line = self.line;
        let col = self.col;
        self.advance(); // consume '"'
        let mut s = String::new();
        loop {
            let Some(b) = self.peek() else {
                return Err(ParseError::UnterminatedString { line, column: col });
            };
            match b {
                b'"' => {
                    self.advance();
                    let end = self.pos;
                    return Ok(Sexpr {
                        kind: SexprKind::String(s),
                        span: Span {
                            start,
                            end,
                            line,
                            column: col,
                        },
                    });
                }
                b'\\' => {
                    self.advance();
                    let Some(esc) = self.peek() else {
                        return Err(ParseError::UnterminatedString { line, column: col });
                    };
                    let ch = match esc {
                        b'"' => '"',
                        b'\\' => '\\',
                        b'n' => '\n',
                        b't' => '\t',
                        b'r' => '\r',
                        other => {
                            return Err(ParseError::InvalidEscape {
                                ch: other as char,
                                line: self.line,
                                column: self.col,
                            });
                        }
                    };
                    s.push(ch);
                    self.advance();
                }
                _ => {
                    s.push(b as char);
                    self.advance();
                }
            }
        }
    }

    fn parse_symbol(
        &mut self,
        start: usize,
        line: usize,
        col: usize,
    ) -> Result<Sexpr, ParseError> {
        while let Some(b) = self.peek() {
            if b.is_ascii_alphanumeric() || b == b'_' || b == b'-' {
                self.advance();
            } else {
                break;
            }
        }
        let end = self.pos;
        let s = std::str::from_utf8(&self.source[start..end])
            .expect("source is utf8")
            .to_string();
        Ok(Sexpr {
            kind: SexprKind::Symbol(s),
            span: Span {
                start,
                end,
                line,
                column: col,
            },
        })
    }

    fn parse_number(
        &mut self,
        start: usize,
        line: usize,
        col: usize,
    ) -> Result<Sexpr, ParseError> {
        // Optional sign already at start; consume it + digits +
        // optional .digits.
        self.advance();
        while let Some(b) = self.peek() {
            if b.is_ascii_digit() || b == b'.' {
                self.advance();
            } else {
                break;
            }
        }
        let end = self.pos;
        let s = std::str::from_utf8(&self.source[start..end])
            .expect("source is utf8")
            .to_string();
        Ok(Sexpr {
            kind: SexprKind::Number(s),
            span: Span {
                start,
                end,
                line,
                column: col,
            },
        })
    }
}