mazer_core/
parser.rs

1use colored::Colorize;
2use regex::Regex;
3use std::fmt;
4
5use crate::interpreter::Environment;
6
7pub struct LispErr {
8    message: String,
9}
10
11impl fmt::Debug for LispErr {
12    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
13        write!(f, "{} {}", "[ERROR]".red().bold(), self.message)
14    }
15}
16
17impl fmt::Display for LispErr {
18    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
19        write!(f, "{} {}", "[ERRPR]".red().bold(), self.message)
20    }
21}
22
23impl From<LispErr> for String {
24    fn from(err: LispErr) -> Self {
25        err.message
26    }
27}
28
29impl LispErr {
30    pub fn new(message: &str) -> Self {
31        LispErr {
32            message: message.to_string(),
33        }
34    }
35}
36
37#[derive(Debug, Clone, PartialEq)]
38pub enum LispExpr {
39    Number(f64),
40    String(String),
41    Symbol(String),
42    Boolean(bool),
43    List(Vec<LispExpr>),
44    Nil,
45    Function(fn(Vec<LispExpr>, &mut Environment) -> Result<LispExpr, LispErr>),
46}
47
48impl fmt::Display for LispExpr {
49    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
50        match self {
51            LispExpr::Number(n) => write!(f, "{}", n),
52            LispExpr::String(s) => write!(f, "\"{}\"", s),
53            LispExpr::Symbol(s) => write!(f, "{}", s),
54            LispExpr::Boolean(b) => write!(f, "{}", b),
55            LispExpr::Nil => write!(f, "nil"),
56            LispExpr::List(list) => {
57                write!(f, "(")?;
58                for (i, expr) in list.iter().enumerate() {
59                    if i > 0 {
60                        write!(f, " ")?;
61                    }
62                    write!(f, "{}", expr)?;
63                }
64                write!(f, ")")
65            }
66            LispExpr::Function(_) => write!(f, "<function>"),
67        }
68    }
69}
70
71#[macro_export]
72macro_rules! wrap_mathml {
73    ($content:expr) => {
74        format!(
75            "<math xmlns=\"http://www.w3.org/1998/Math/MathML\">{}</math>",
76            $content
77        )
78    };
79}
80
81#[derive(Debug)]
82pub struct MathML(String);
83
84impl MathML {
85    pub fn new(src: String) -> Self {
86        MathML(src)
87    }
88
89    pub fn to_string(&self) -> String {
90        self.0.clone()
91    }
92}
93
94impl From<String> for MathML {
95    fn from(content: String) -> Self {
96        MathML(wrap_mathml!(content))
97    }
98}
99
100impl From<&LispExpr> for MathML {
101    fn from(expr: &LispExpr) -> Self {
102        let expr = expr.clone();
103        match expr {
104            LispExpr::Function(_) => MathML::new("<mrow>Error: function in expression</mrow>".to_string()),
105            LispExpr::Number(n) => format!("<mn>{}</mn>", n).into(),
106            LispExpr::Symbol(s) => format!("<mi>{}</mi>", s).into(),
107            LispExpr::String(s) => format!("<mtext>{}</mtext>", s).into(),
108            LispExpr::Boolean(b) => format!("<mn>{}</mn>", b).into(),
109            LispExpr::Nil => "<mi>nil</mi>".to_string().into(),
110            LispExpr::List(list) => {
111                if list.is_empty() {
112                    return MathML::new(String::new());
113                }
114
115                if let LispExpr::Symbol(operator) = &list[0] {
116                    let args = &list[1..];
117                    match operator.as_str() {
118                        "+" => MathML::addition(args),
119                        "-" => MathML::subtraction(args),
120                        "*" => MathML::multiplication(args),
121                        "/" => MathML::division(args),
122                        "matrix"=> MathML::matrix(args),
123                        _ => unimplemented!("From<&LispExpr> for MathML: operator `{}` not implemented", operator),
124                    }
125                } else {
126                    return MathML::new("<mrow>Error: first element of a list must be a symbol</mrow>".to_string());
127                }
128            }
129        }
130    }
131}
132
133
134pub struct Parser {
135    src: String,
136    tokens: Vec<String>,
137    ast: Vec<LispExpr>,
138}
139
140impl Parser {
141    pub fn new(src: String) -> Self {
142        let token = Parser::tokenize(&src);
143
144        Parser {
145            src: src,
146            tokens: token,
147            ast: Vec::new(),
148        }
149    }
150
151    /// This is used when a lisp expression is within a fmt or eval
152    /// call. We need to wrap it in parentheses to ensure it's
153    /// treated as a single expression. Else will simply get back
154    /// the first token or equivalent.
155    /// Caller's responsibility to ensure the string is a valid
156    /// Caller must call wrap_parens before the .parse() method.
157    /// This also prevents imho the rather ugly redundant and repeated
158    /// parens like so: fmt((expr)) when you can simply write fmt(expr)
159    /// NOTE: does not check for balances parenthesis
160    pub fn wrap_parens_safely(src: String) -> String {
161        let src = src.trim();
162        if src.starts_with('(') && src.ends_with(')') {
163            src.to_string()
164        } else {
165            format!("({})", src)
166        }
167    }
168
169    pub fn append_tokens(&mut self, src: String) {
170        let token = Parser::tokenize(&src);
171        self.tokens.extend(token);
172    }
173
174    /// This regular expression is used for tokenizing a Lisp-like language.
175    /// 
176    /// It matches and captures different types of tokens, including:
177    /// 
178    /// - **Whitespace and commas** (`[\s,]*`)  
179    ///   - These are ignored as separators.
180    /// 
181    /// - **Special symbols** (`~@|[\[\]{}()'`~^@]`)  
182    ///   - Matches Lisp syntax elements like `(`, `)`, `[`, `]`, `{`, `}`, `'`, `` ` ``, `~`, `@`, `^`, and `~@`.
183    /// 
184    /// - **String literals** (`"(?:\\.|[^\\"])*"?`)  
185    ///   - Matches double-quoted strings, allowing for escaped characters (e.g., `"hello"`, `"escaped \" quote"`).
186    ///   - The trailing `"?` allows capturing an incomplete string (e.g., `"unterminated`).
187    /// 
188    /// - **Comments** (`;.*`)  
189    ///   - Matches Lisp-style comments starting with `;` and continuing to the end of the line.
190    /// 
191    /// - **Identifiers and other tokens** (`[^\s\[\]{}('"`,;)]*`)  
192    ///   - Matches symbols, numbers, and variable names, ensuring they don't include special characters.
193    pub fn tokenize(src: &str) -> Vec<String> {
194        let regex =
195            Regex::new(r#"[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)"#)
196                .expect("regex should always compile");
197        let mut results = Vec::with_capacity(1024);
198
199        for capture in regex.captures_iter(src) {
200            let token = capture.get(1).unwrap().as_str();
201            if token.is_empty() || token.starts_with(';') {
202                continue; // skip empty tokens and comments
203            }
204            results.push(token.to_string());
205        }
206
207        results
208    }
209
210    pub fn parse(&mut self) -> LispExpr {
211        let tokens = Parser::tokenize(&self.tokens.join(" "));
212        let (expr, _) = Parser::parse_tokens(&tokens, 0);
213        self.ast.push(expr.clone());
214        expr
215    }
216
217    pub fn ast(&self) -> Vec<LispExpr> {
218        self.ast.clone()
219    }
220
221    pub fn source(&self) -> &str {
222        &self.src
223    }
224
225    fn parse_tokens(tokens: &[String], start_index: usize) -> (LispExpr, usize) {
226        if start_index >= tokens.len() {
227            return (LispExpr::Nil, start_index);
228        }
229
230        let token = &tokens[start_index];
231
232        if token == "(" {
233            let mut list = Vec::new();
234            let mut idx = start_index + 1;
235
236            while idx < tokens.len() && tokens[idx] != ")" {
237                let (expr, next_idx) = Parser::parse_tokens(tokens, idx);
238                list.push(expr);
239                idx = next_idx;
240            }
241
242            // Skip the closing parenthesis
243            idx = if idx < tokens.len() { idx + 1 } else { idx };
244
245            return (LispExpr::List(list), idx);
246        } else {
247            (Parser::parse_atom(token), start_index + 1)
248        }
249    }
250
251    fn parse_atom(token: &str) -> LispExpr {
252        // Handle strings
253        if token.starts_with('"') {
254            let content = if token.ends_with('"') && token.len() > 1 {
255                &token[1..token.len() - 1]
256            } else {
257                &token[1..]
258            };
259            return LispExpr::String(content.to_string());
260        }
261
262        // Handle numbers
263        if let Ok(num) = token.parse::<f64>() {
264            return LispExpr::Number(num);
265        }
266
267        // Handle booleans and nil
268        match token {
269            "true" => return LispExpr::Boolean(true),
270            "false" => return LispExpr::Boolean(false),
271            "nil" => return LispExpr::Nil,
272            _ => {}
273        }
274
275        // Otherwise it's a symbol
276        LispExpr::Symbol(token.to_string())
277    }
278}
279