1#![forbid(unsafe_code)]
3
4use lexical_core as lexical;
5use std::collections::HashMap;
6use std::fmt;
7pub use phf::Map;
8pub use phf::phf_map;
9pub use phf;
10
11#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
16pub struct Symbol(u32);
17
18#[derive(Clone, Debug)]
19pub enum Value {
20 Int(i64),
21 Float(f64),
22 Bool(bool),
23 Str(Symbol),
24 Unit,
25}
26
27impl Value {
28 pub fn as_int(&self) -> Option<i64> {
29 if let Value::Int(i) = self { Some(*i) } else { None }
30 }
31 pub fn as_float(&self) -> Option<f64> {
32 if let Value::Float(f) = self { Some(*f) } else { None }
33 }
34 pub fn as_bool(&self) -> Option<bool> {
35 if let Value::Bool(b) = self { Some(*b) } else { None }
36 }
37}
38
39pub struct Interner {
40 map: HashMap<String, Symbol>,
41 vec: Vec<String>,
42}
43impl Interner {
44 pub fn new() -> Self {
45 Self { map: HashMap::new(), vec: Vec::new() }
46 }
47 pub fn get_or_intern(&mut self, s: &str) -> Symbol {
48 if let Some(&sym) = self.map.get(s) {
49 return sym;
50 }
51 let id = Symbol(self.vec.len() as u32);
52 self.vec.push(s.to_owned());
53 let key_ref: &str = &self.vec[id.0 as usize];
55 self.map.insert(key_ref.to_string(), id);
56 id
57 }
58 pub fn resolve(&self, sym: Symbol) -> &str {
59 &self.vec[sym.0 as usize]
60 }
61}
62
63pub type Builtin<S> = fn(args: &[Value], state: &mut S, interner: &mut Interner) -> Result<Value, RuntimeError>;
64
65#[derive(Debug)]
66pub struct ParseError {
67 pub msg: String,
68 pub pos: usize,
69}
70impl fmt::Display for ParseError {
71 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "parse error at {}: {}", self.pos, self.msg) }
72}
73
74#[derive(Debug)]
75pub struct RuntimeError {
76 pub msg: String,
77}
78impl fmt::Display for RuntimeError {
79 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "runtime error: {}", self.msg) }
80}
81impl std::error::Error for RuntimeError {}
82
83#[derive(Debug)]
84pub enum Error {
85 Parse(ParseError),
86 Runtime(RuntimeError),
87}
88impl fmt::Display for Error {
89 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90 match self {
91 Error::Parse(e) => write!(f, "{e}"),
92 Error::Runtime(e) => write!(f, "{e}"),
93 }
94 }
95}
96impl std::error::Error for Error {}
97impl From<ParseError> for Error { fn from(e: ParseError) -> Self { Error::Parse(e) } }
98impl From<RuntimeError> for Error { fn from(e: RuntimeError) -> Self { Error::Runtime(e) } }
99
100pub struct Interpreter<S:'static> {
101 interner: Interner,
102 builtins: &'static phf::Map<&'static str, Builtin<S>>,
103}
104impl<S> Interpreter<S> {
105 pub fn new(builtins: &'static phf::Map<&'static str, Builtin<S>>) -> Self {
106 Self { interner: Interner::new(), builtins }
107 }
108
109 pub fn run(&mut self, state: &mut S, code: &str) -> Result<(), Error> {
111 let mut parser = Parser::new(code, &mut self.interner)?;
112 let prog = parser.parse_program()?;
113 let mut env: HashMap<Symbol, Value> = HashMap::new();
114 for stmt in &prog.stmts {
115 self.exec_stmt(stmt, state, &mut env)?;
116 }
117 Ok(())
118 }
119
120 pub fn interner(&self) -> &Interner { &self.interner }
122}
123
124#[derive(Clone, Debug)]
128enum Expr {
129 Literal(Value),
130 Var(Symbol),
131 Call(Symbol, Vec<Expr>),
132}
133#[derive(Clone, Debug)]
134enum Stmt {
135 Assign(Symbol, Expr),
136 ExprStmt(Expr),
137 If { cond: Expr, then_blk: Vec<Stmt>, else_blk: Vec<Stmt> },
138 For { var: Symbol, start: Expr, end: Expr, body: Vec<Stmt> }, }
140#[derive(Clone, Debug)]
141struct Program { stmts: Vec<Stmt> }
142
143#[derive(Copy, Clone, Debug, PartialEq, Eq)]
147enum TokKind {
148 Ident,
149 Str,
150 Int,
151 Float,
152 Assign, LParen, RParen, LBrace, RBrace, Comma, RangeDots, If,
160 Else,
161 For,
162 In,
163 Eof,
164}
165#[derive(Clone, Debug)]
166struct Token<'a> {
167 kind: TokKind,
168 pos: usize,
169 text: TokText<'a>,
171}
172#[derive(Clone, Debug)]
173enum TokText<'a> {
174 Slice(&'a str),
175 Owned(String),
176}
177impl<'a> TokText<'a> {
178 fn as_str(&self) -> &str {
179 match self {
180 TokText::Slice(s) => s,
181 TokText::Owned(s) => s.as_str(),
182 }
183 }
184}
185
186struct Lexer<'a> {
187 src: &'a str,
188 bytes: &'a [u8],
189 pos: usize,
190}
191impl<'a> Lexer<'a> {
192 fn new(src: &'a str) -> Self { Self { src, bytes: src.as_bytes(), pos: 0 } }
193 fn peek(&self) -> Option<u8> { self.bytes.get(self.pos).copied() }
194 fn bump(&mut self) -> Option<u8> { let b = self.peek()?; self.pos += 1; Some(b) }
195 fn slice(&self, start: usize, end: usize) -> &'a str { &self.src[start..end] }
196 fn skip_ws_and_comments(&mut self) {
197 loop {
198 while matches!(self.peek(), Some(b) if b.is_ascii_whitespace()) { self.pos += 1; }
199 if self.peek() == Some(b'/') && self.bytes.get(self.pos + 1) == Some(&b'/') {
200 while let Some(b) = self.bump() { if b == b'\n' { break; } }
201 continue;
202 }
203 break;
204 }
205 }
206 fn next_token(&mut self) -> Result<Token<'a>, ParseError> {
207 self.skip_ws_and_comments();
208 let start = self.pos;
209 let text_slice = |me: &Lexer<'a>| TokText::Slice(me.slice(start, me.pos));
210 match self.peek() {
211 None => Ok(Token { kind: TokKind::Eof, pos: self.pos, text: TokText::Slice("") }),
212 Some(b'"') => self.lex_string(),
213 Some(b'(') => { self.pos+=1; Ok(Token { kind: TokKind::LParen, pos: start, text: TokText::Slice("(") }) }
214 Some(b')') => { self.pos+=1; Ok(Token { kind: TokKind::RParen, pos: start, text: TokText::Slice(")") }) }
215 Some(b'{') => { self.pos+=1; Ok(Token { kind: TokKind::LBrace, pos: start, text: TokText::Slice("{") }) }
216 Some(b'}') => { self.pos+=1; Ok(Token { kind: TokKind::RBrace, pos: start, text: TokText::Slice("}") }) }
217 Some(b',') => { self.pos+=1; Ok(Token { kind: TokKind::Comma, pos: start, text: TokText::Slice(",") }) }
218 Some(b'=') => { self.pos+=1; Ok(Token { kind: TokKind::Assign, pos: start, text: TokText::Slice("=") }) }
219 Some(b'.') => {
220 if self.bytes.get(self.pos+1) == Some(&b'.') {
221 self.pos += 2;
222 Ok(Token { kind: TokKind::RangeDots, pos: start, text: TokText::Slice("..") })
223 } else {
224 self.lex_number()
225 }
226 }
227 Some(b) if b.is_ascii_alphabetic() || b == b'_' => self.lex_ident_or_kw(),
228 Some(b'-') | Some(b'0'..=b'9') => self.lex_number(),
229 _ => Err(ParseError { msg: "unexpected character".into(), pos: start }),
230 }
231 .map(|mut t| { if matches!(t.kind, TokKind::Ident | TokKind::Int | TokKind::Float) && matches!(t.text, TokText::Slice("")) { t.text = text_slice(self); } t })
232 }
233 fn lex_ident_or_kw(&mut self) -> Result<Token<'a>, ParseError> {
234 let start = self.pos;
235 while let Some(b) = self.peek() {
236 if b.is_ascii_alphanumeric() || b == b'_' { self.pos += 1; } else { break; }
237 }
238 let s = self.slice(start, self.pos);
239 let kind = match s {
240 "if" => TokKind::If,
241 "else" => TokKind::Else,
242 "for" => TokKind::For,
243 "in" => TokKind::In,
244 _ => TokKind::Ident,
245 };
246 Ok(Token { kind, pos: start, text: TokText::Slice(s) })
247 }
248 fn lex_string(&mut self) -> Result<Token<'a>, ParseError> {
249 let start = self.pos;
250 self.bump(); let mut out = String::new();
252 while let Some(b) = self.bump() {
253 match b {
254 b'"' => return Ok(Token { kind: TokKind::Str, pos: start, text: TokText::Owned(out) }),
255 b'\\' => {
256 let esc = self.bump().ok_or(ParseError { msg: "unterminated escape".into(), pos: self.pos })?;
257 match esc {
258 b'"' => out.push('"'),
259 b'\\' => out.push('\\'),
260 b'n' => out.push('\n'),
261 b't' => out.push('\t'),
262 b'r' => out.push('\r'),
263 _ => return Err(ParseError { msg: "invalid escape".into(), pos: self.pos }),
264 }
265 }
266 _ => out.push(b as char),
267 }
268 }
269 Err(ParseError { msg: "unterminated string".into(), pos: start })
270 }
271 fn lex_number(&mut self) -> Result<Token<'a>, ParseError> {
272 let start = self.pos;
273 if self.peek() == Some(b'-') { self.pos += 1; }
274 let mut has_dot = false;
275 let mut has_exp = false;
276 while let Some(b) = self.peek() {
277 match b {
278 b'0'..=b'9' => self.pos += 1,
279 b'.' => { has_dot = true; self.pos += 1; }
280 b'e' | b'E' => { has_exp = true; self.pos += 1; if matches!(self.peek(), Some(b'+') | Some(b'-')) { self.pos += 1; } }
281 _ => break,
282 }
283 }
284 let s = &self.bytes[start..self.pos];
285 if has_dot || has_exp {
286 lexical::parse::<f64>(s).map_err(|_| ParseError { msg: "invalid float".into(), pos: start })?;
287 Ok(Token { kind: TokKind::Float, pos: start, text: TokText::Slice(self.slice(start, self.pos)) })
288 } else {
289 lexical::parse::<i64>(s).map_err(|_| ParseError { msg: "invalid int".into(), pos: start })?;
290 Ok(Token { kind: TokKind::Int, pos: start, text: TokText::Slice(self.slice(start, self.pos)) })
291 }
292 }
293}
294
295struct Parser<'a, 'i> {
299 lex: Lexer<'a>,
300 cur: Token<'a>,
301 interner: &'i mut Interner,
302}
303impl<'a, 'i> Parser<'a, 'i> {
304 fn new(src: &'a str, interner: &'i mut Interner) -> Result<Self, ParseError> {
305 let mut lex = Lexer::new(src);
306 let cur = lex.next_token()?;
307 Ok(Self { lex, cur, interner })
308 }
309 fn bump(&mut self) -> Result<(), ParseError> { self.cur = self.lex.next_token()?; Ok(()) }
310 fn expect(&mut self, kind: TokKind) -> Result<(), ParseError> {
311 if self.cur.kind == kind { self.bump() } else {
312 Err(ParseError { msg: format!("expected {:?}, found {:?}", kind, self.cur.kind), pos: self.cur.pos })
313 }
314 }
315 fn parse_program(&mut self) -> Result<Program, ParseError> {
316 let mut stmts = Vec::new();
317 while self.cur.kind != TokKind::Eof {
318 stmts.push(self.parse_stmt()?);
319 }
320 Ok(Program { stmts })
321 }
322 fn parse_stmt(&mut self) -> Result<Stmt, ParseError> {
323 match self.cur.kind {
324 TokKind::If => self.parse_if(),
325 TokKind::For => self.parse_for(),
326 TokKind::Ident => {
327 let name_sym = self.interner.get_or_intern(self.cur.text.as_str());
329 self.bump()?;
330 if self.cur.kind == TokKind::Assign {
331 self.bump()?;
332 let expr = self.parse_expr()?;
333 Ok(Stmt::Assign(name_sym, expr))
334 } else if self.cur.kind == TokKind::LParen {
335 let call = self.finish_call(name_sym)?;
336 Ok(Stmt::ExprStmt(call))
337 } else {
338 Err(ParseError { msg: "expected '=' or '(' after identifier".into(), pos: self.cur.pos })
339 }
340 }
341 _ => {
342 let expr = self.parse_expr()?;
344 Ok(Stmt::ExprStmt(expr))
345 }
346 }
347 }
348 fn parse_if(&mut self) -> Result<Stmt, ParseError> {
349 self.expect(TokKind::If)?;
350 let cond = self.parse_expr()?;
351 let then_blk = self.parse_block()?;
352 let else_blk = if self.cur.kind == TokKind::Else {
353 self.bump()?;
354 self.parse_block()?
355 } else { Vec::new() };
356 Ok(Stmt::If { cond, then_blk, else_blk })
357 }
358 fn parse_for(&mut self) -> Result<Stmt, ParseError> {
359 self.expect(TokKind::For)?;
360 let var = if let TokKind::Ident = self.cur.kind {
361 let sym = self.interner.get_or_intern(self.cur.text.as_str());
362 self.bump()?;
363 sym
364 } else {
365 return Err(ParseError { msg: "expected loop variable".into(), pos: self.cur.pos });
366 };
367 self.expect(TokKind::In)?;
368 let start = self.parse_expr()?;
369 self.expect(TokKind::RangeDots)?;
370 let end = self.parse_expr()?;
371 let body = self.parse_block()?;
372 Ok(Stmt::For { var, start, end, body })
373 }
374 fn parse_block(&mut self) -> Result<Vec<Stmt>, ParseError> {
375 self.expect(TokKind::LBrace)?;
376 let mut stmts = Vec::new();
377 while self.cur.kind != TokKind::RBrace {
378 if self.cur.kind == TokKind::Eof {
379 return Err(ParseError { msg: "unterminated block".into(), pos: self.cur.pos });
380 }
381 stmts.push(self.parse_stmt()?);
382 }
383 self.expect(TokKind::RBrace)?;
384 Ok(stmts)
385 }
386 fn parse_expr(&mut self) -> Result<Expr, ParseError> {
387 match self.cur.kind {
388 TokKind::Int => {
389 let s = self.cur.text.as_str();
390 let i = lexical::parse::<i64>(s.as_bytes()).unwrap();
391 self.bump()?;
392 Ok(Expr::Literal(Value::Int(i)))
393 }
394 TokKind::Float => {
395 let s = self.cur.text.as_str();
396 let f = lexical::parse::<f64>(s.as_bytes()).unwrap();
397 self.bump()?;
398 Ok(Expr::Literal(Value::Float(f)))
399 }
400 TokKind::Str => {
401 let s = self.cur.text.as_str();
402 let sym = self.interner.get_or_intern(s);
403 self.bump()?;
404 Ok(Expr::Literal(Value::Str(sym)))
405 }
406 TokKind::Ident => {
407 let name = self.interner.get_or_intern(self.cur.text.as_str());
408 self.bump()?;
409 if self.cur.kind == TokKind::LParen {
410 self.finish_call(name)
411 } else {
412 Ok(Expr::Var(name))
413 }
414 }
415 _ => Err(ParseError { msg: "expected expression".into(), pos: self.cur.pos }),
416 }
417 }
418 fn finish_call(&mut self, func: Symbol) -> Result<Expr, ParseError> {
419 self.expect(TokKind::LParen)?;
420 let mut args = Vec::new();
421 if self.cur.kind != TokKind::RParen {
422 loop {
423 args.push(self.parse_expr()?);
424 if self.cur.kind == TokKind::Comma {
425 self.bump()?;
426 continue;
427 }
428 break;
429 }
430 }
431 self.expect(TokKind::RParen)?;
432 Ok(Expr::Call(func, args))
433 }
434}
435
436impl<S> Interpreter<S> {
440 fn exec_block(&mut self, blk: &[Stmt], state: &mut S, env: &mut HashMap<Symbol, Value>) -> Result<(), RuntimeError> {
441 for stmt in blk { self.exec_stmt(stmt, state, env)?; }
442 Ok(())
443 }
444 fn exec_stmt(&mut self, stmt: &Stmt, state: &mut S, env: &mut HashMap<Symbol, Value>) -> Result<(), RuntimeError> {
445 match stmt {
446 Stmt::Assign(name, expr) => {
447 let v = self.eval_expr(expr, state, env)?;
448 env.insert(*name, v);
449 Ok(())
450 }
451 Stmt::ExprStmt(expr) => {
452 let _ = self.eval_expr(expr, state, env)?;
453 Ok(())
454 }
455 Stmt::If { cond, then_blk, else_blk } => {
456 let c = self.eval_expr(cond, state, env)?;
457 match c {
458 Value::Bool(true) => self.exec_block(then_blk, state, env),
459 Value::Bool(false) => self.exec_block(else_blk, state, env),
460 _ => Err(RuntimeError { msg: "if condition must be bool".into() }),
461 }
462 }
463 Stmt::For { var, start, end, body } => {
464 let s = self.eval_expr(start, state, env)?;
465 let e = self.eval_expr(end, state, env)?;
466 let (Some(mut s), Some(e)) = (s.as_int(), e.as_int()) else {
467 return Err(RuntimeError { msg: "for range must be integers".into() });
468 };
469 while s <= e {
470 env.insert(*var, Value::Int(s));
471 self.exec_block(body, state, env)?;
472 s += 1;
473 }
474 Ok(())
475 }
476 }
477 }
478 fn eval_expr(&mut self, expr: &Expr, state: &mut S, env: &mut HashMap<Symbol, Value>) -> Result<Value, RuntimeError> {
479 match expr {
480 Expr::Literal(v) => Ok(v.clone()),
481 Expr::Var(sym) => {
482 env.get(sym).cloned().ok_or_else(|| RuntimeError { msg: format!("undefined variable '{}'", self.interner.resolve(*sym)) })
483 }
484 Expr::Call(func_sym, args_exprs) => {
485 let mut args = Vec::with_capacity(args_exprs.len());
486 for e in args_exprs { args.push(self.eval_expr(e, state, env)?); }
487 let func_name = self.interner.resolve(*func_sym);
488 if let Some(f) = self.builtins.get(func_name) {
489 f(&args, state, &mut self.interner)
490 } else {
491 Err(RuntimeError { msg: format!("unknown function '{}'", func_name) })
492 }
493 }
494 }
495 }
496}
497
498#[cfg(test)]
502mod tests {
503 use std::time::Instant;
504
505 use super::*;
506 use phf::phf_map;
507
508 #[derive(Default)]
509 struct State {
510 out: String,
511 }
512
513 fn add(args: &[Value], _st: &mut State, _i: &mut Interner) -> Result<Value, RuntimeError> {
514 if args.len() != 2 { return Err(RuntimeError { msg: "add expects 2 args".into() }); }
515 match (&args[0], &args[1]) {
516 (Value::Int(a), Value::Int(b)) => Ok(Value::Int(a + b)),
517 (Value::Float(a), Value::Float(b)) => Ok(Value::Float(a + b)),
518 (Value::Int(a), Value::Float(b)) => Ok(Value::Float(*a as f64 + *b)),
519 (Value::Float(a), Value::Int(b)) => Ok(Value::Float(*a + *b as f64)),
520 _ => Err(RuntimeError { msg: "add expects numbers".into() }),
521 }
522 }
523
524 fn equals(args: &[Value], _st: &mut State, i: &mut Interner) -> Result<Value, RuntimeError> {
525 if args.len() != 2 { return Err(RuntimeError { msg: "equals expects 2 args".into() }); }
526 let eq = match (&args[0], &args[1]) {
527 (Value::Int(a), Value::Int(b)) => a == b,
528 (Value::Float(a), Value::Float(b)) => a == b,
529 (Value::Int(a), Value::Float(b)) => (*a as f64) == *b,
530 (Value::Float(a), Value::Int(b)) => *a == (*b as f64),
531 (Value::Bool(a), Value::Bool(b)) => a == b,
532 (Value::Str(sa), Value::Str(sb)) => i.resolve(*sa) == i.resolve(*sb),
533 _ => false,
534 };
535 Ok(Value::Bool(eq))
536 }
537
538 fn to_str(args: &[Value], _st: &mut State, i: &mut Interner) -> Result<Value, RuntimeError> {
539 if args.len() != 1 { return Err(RuntimeError { msg: "to_str expects 1 arg".into() }); }
540 let s = match &args[0] {
541 Value::Int(v) => v.to_string(),
542 Value::Float(v) => v.to_string(),
543 Value::Bool(v) => v.to_string(),
544 Value::Str(sym) => i.resolve(*sym).to_string(),
545 Value::Unit => "unit".to_string(),
546 };
547 Ok(Value::Str(i.get_or_intern(&s)))
548 }
549
550 fn concat(args: &[Value], _st: &mut State, i: &mut Interner) -> Result<Value, RuntimeError> {
551 if args.len() != 2 { return Err(RuntimeError { msg: "concat expects 2 args".into() }); }
552 let a = match &args[0] { Value::Str(s) => i.resolve(*s), _ => return Err(RuntimeError { msg: "concat arg 1 must be str".into() }) };
553 let b = match &args[1] { Value::Str(s) => i.resolve(*s), _ => return Err(RuntimeError { msg: "concat arg 2 must be str".into() }) };
554 let mut s = String::with_capacity(a.len() + b.len());
555 s.push_str(a); s.push_str(b);
556 Ok(Value::Str(i.get_or_intern(&s)))
557 }
558
559 fn print(args: &[Value], st: &mut State, i: &mut Interner) -> Result<Value, RuntimeError> {
560 for (idx, v) in args.iter().enumerate() {
561 if idx > 0 { st.out.push(' '); }
562 match v {
563 Value::Int(x) => st.out.push_str(&x.to_string()),
564 Value::Float(x) => st.out.push_str(&x.to_string()),
565 Value::Bool(x) => st.out.push_str(&x.to_string()),
566 Value::Str(sym) => st.out.push_str(i.resolve(*sym)),
567 Value::Unit => st.out.push_str("unit"),
568 }
569 }
570 st.out.push('\n');
571 Ok(Value::Unit)
572 }
573
574 static BUILTINS: phf::Map<&'static str, Builtin<State>> = phf_map! {
575 "add" => add as Builtin<State>,
576 "equals" => equals as Builtin<State>,
577 "to_str" => to_str as Builtin<State>,
578 "concat" => concat as Builtin<State>,
579 "print" => print as Builtin<State>,
580 };
581
582 #[test]
583 fn runs_sample() {
584 let code = r#"
585a = 0
586b = 1
587c = add(a, b)
588print(c)
589
590if equals(c, 1) {
591 print("one")
592} else {
593 print("not one")
594}
595
596sum = 0
597for i in 1 .. 5 {
598 sum = add(sum, i)
599}
600print(concat("sum = ", to_str(15)))
601"#;
602 let a = Instant::now();
603 let mut interp = Interpreter::<State>::new(&BUILTINS);
604 let mut st = State::default();
605 interp.run(&mut st, code).unwrap();
606 assert!(st.out.contains("1"));
607 assert!(st.out.contains("one"));
608 assert!(st.out.contains("sum = 15"));
609 println!("this many microseconds have passed {}",a.elapsed().as_micros())
610 }
611}