mikro/
lib.rs

1// src/lib.rs
2#![forbid(unsafe_code)]
3
4use lexical_core as lexical;
5use std::collections::HashMap;
6use std::fmt;
7pub use phf::Map;
8pub use phf::phf_map;
9pub use phf;
10
11//
12// Public API
13//
14
15#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
16pub struct Symbol(u32);
17
18#[derive(Clone, Debug)]
19pub enum Value {
20    Int(i64),
21    Float(f64),
22    Bool(bool),
23    Str(Symbol),
24    Unit,
25}
26
27impl Value {
28    pub fn as_int(&self) -> Option<i64> {
29        if let Value::Int(i) = self { Some(*i) } else { None }
30    }
31    pub fn as_float(&self) -> Option<f64> {
32        if let Value::Float(f) = self { Some(*f) } else { None }
33    }
34    pub fn as_bool(&self) -> Option<bool> {
35        if let Value::Bool(b) = self { Some(*b) } else { None }
36    }
37}
38
39pub struct Interner {
40    map: HashMap<String, Symbol>,
41    vec: Vec<String>,
42}
43impl Interner {
44    pub fn new() -> Self {
45        Self { map: HashMap::new(), vec: Vec::new() }
46    }
47    pub fn get_or_intern(&mut self, s: &str) -> Symbol {
48        if let Some(&sym) = self.map.get(s) {
49            return sym;
50        }
51        let id = Symbol(self.vec.len() as u32);
52        self.vec.push(s.to_owned());
53        // Reuse the inserted buffer as the map key without extra allocation.
54        let key_ref: &str = &self.vec[id.0 as usize];
55        self.map.insert(key_ref.to_string(), id);
56        id
57    }
58    pub fn resolve(&self, sym: Symbol) -> &str {
59        &self.vec[sym.0 as usize]
60    }
61}
62
63pub type Builtin<S> = fn(args: &[Value], state: &mut S, interner: &mut Interner) -> Result<Value, RuntimeError>;
64
65#[derive(Debug)]
66pub struct ParseError {
67    pub msg: String,
68    pub pos: usize,
69}
70impl fmt::Display for ParseError {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "parse error at {}: {}", self.pos, self.msg) }
72}
73
74#[derive(Debug)]
75pub struct RuntimeError {
76    pub msg: String,
77}
78impl fmt::Display for RuntimeError {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "runtime error: {}", self.msg) }
80}
81impl std::error::Error for RuntimeError {}
82
83#[derive(Debug)]
84pub enum Error {
85    Parse(ParseError),
86    Runtime(RuntimeError),
87}
88impl fmt::Display for Error {
89    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90        match self {
91            Error::Parse(e) => write!(f, "{e}"),
92            Error::Runtime(e) => write!(f, "{e}"),
93        }
94    }
95}
96impl std::error::Error for Error {}
97impl From<ParseError> for Error { fn from(e: ParseError) -> Self { Error::Parse(e) } }
98impl From<RuntimeError> for Error { fn from(e: RuntimeError) -> Self { Error::Runtime(e) } }
99
100pub struct Interpreter<S:'static> {
101    interner: Interner,
102    builtins: &'static phf::Map<&'static str, Builtin<S>>,
103}
104impl<S> Interpreter<S> {
105    pub fn new(builtins: &'static phf::Map<&'static str, Builtin<S>>) -> Self {
106        Self { interner: Interner::new(), builtins }
107    }
108
109    // Direct interpretation: parse and execute immediately.
110    pub fn run(&mut self, state: &mut S, code: &str) -> Result<(), Error> {
111        let mut parser = Parser::new(code, &mut self.interner)?;
112        let prog = parser.parse_program()?;
113        let mut env: HashMap<Symbol, Value> = HashMap::new();
114        for stmt in &prog.stmts {
115            self.exec_stmt(stmt, state, &mut env)?;
116        }
117        Ok(())
118    }
119
120    // Optional: expose the interner for advanced hosts (read-only).
121    pub fn interner(&self) -> &Interner { &self.interner }
122}
123
124//
125// Internal AST
126//
127#[derive(Clone, Debug)]
128enum Expr {
129    Literal(Value),
130    Var(Symbol),
131    Call(Symbol, Vec<Expr>),
132}
133#[derive(Clone, Debug)]
134enum Stmt {
135    Assign(Symbol, Expr),
136    ExprStmt(Expr),
137    If { cond: Expr, then_blk: Vec<Stmt>, else_blk: Vec<Stmt> },
138    For { var: Symbol, start: Expr, end: Expr, body: Vec<Stmt> }, // inclusive 1 .. 5
139}
140#[derive(Clone, Debug)]
141struct Program { stmts: Vec<Stmt> }
142
143//
144// Lexer
145//
146#[derive(Copy, Clone, Debug, PartialEq, Eq)]
147enum TokKind {
148    Ident,
149    Str,
150    Int,
151    Float,
152    Assign,       // =
153    LParen,       // (
154    RParen,       // )
155    LBrace,       // {
156    RBrace,       // }
157    Comma,        // ,
158    RangeDots,    // ..
159    If,
160    Else,
161    For,
162    In,
163    Eof,
164}
165#[derive(Clone, Debug)]
166struct Token<'a> {
167    kind: TokKind,
168    pos: usize,
169    // Borrowed for id/num; owned for unescaped string literals.
170    text: TokText<'a>,
171}
172#[derive(Clone, Debug)]
173enum TokText<'a> {
174    Slice(&'a str),
175    Owned(String),
176}
177impl<'a> TokText<'a> {
178    fn as_str(&self) -> &str {
179        match self {
180            TokText::Slice(s) => s,
181            TokText::Owned(s) => s.as_str(),
182        }
183    }
184}
185
186struct Lexer<'a> {
187    src: &'a str,
188    bytes: &'a [u8],
189    pos: usize,
190}
191impl<'a> Lexer<'a> {
192    fn new(src: &'a str) -> Self { Self { src, bytes: src.as_bytes(), pos: 0 } }
193    fn peek(&self) -> Option<u8> { self.bytes.get(self.pos).copied() }
194    fn bump(&mut self) -> Option<u8> { let b = self.peek()?; self.pos += 1; Some(b) }
195    fn slice(&self, start: usize, end: usize) -> &'a str { &self.src[start..end] }
196    fn skip_ws_and_comments(&mut self) {
197        loop {
198            while matches!(self.peek(), Some(b) if b.is_ascii_whitespace()) { self.pos += 1; }
199            if self.peek() == Some(b'/') && self.bytes.get(self.pos + 1) == Some(&b'/') {
200                while let Some(b) = self.bump() { if b == b'\n' { break; } }
201                continue;
202            }
203            break;
204        }
205    }
206    fn next_token(&mut self) -> Result<Token<'a>, ParseError> {
207        self.skip_ws_and_comments();
208        let start = self.pos;
209        let text_slice = |me: &Lexer<'a>| TokText::Slice(me.slice(start, me.pos));
210        match self.peek() {
211            None => Ok(Token { kind: TokKind::Eof, pos: self.pos, text: TokText::Slice("") }),
212            Some(b'"') => self.lex_string(),
213            Some(b'(') => { self.pos+=1; Ok(Token { kind: TokKind::LParen, pos: start, text: TokText::Slice("(") }) }
214            Some(b')') => { self.pos+=1; Ok(Token { kind: TokKind::RParen, pos: start, text: TokText::Slice(")") }) }
215            Some(b'{') => { self.pos+=1; Ok(Token { kind: TokKind::LBrace, pos: start, text: TokText::Slice("{") }) }
216            Some(b'}') => { self.pos+=1; Ok(Token { kind: TokKind::RBrace, pos: start, text: TokText::Slice("}") }) }
217            Some(b',') => { self.pos+=1; Ok(Token { kind: TokKind::Comma, pos: start, text: TokText::Slice(",") }) }
218            Some(b'=') => { self.pos+=1; Ok(Token { kind: TokKind::Assign, pos: start, text: TokText::Slice("=") }) }
219            Some(b'.') => {
220                if self.bytes.get(self.pos+1) == Some(&b'.') {
221                    self.pos += 2;
222                    Ok(Token { kind: TokKind::RangeDots, pos: start, text: TokText::Slice("..") })
223                } else {
224                    self.lex_number()
225                }
226            }
227            Some(b) if b.is_ascii_alphabetic() || b == b'_' => self.lex_ident_or_kw(),
228            Some(b'-') | Some(b'0'..=b'9') => self.lex_number(),
229            _ => Err(ParseError { msg: "unexpected character".into(), pos: start }),
230        }
231        .map(|mut t| { if matches!(t.kind, TokKind::Ident | TokKind::Int | TokKind::Float) && matches!(t.text, TokText::Slice("")) { t.text = text_slice(self); } t })
232    }
233    fn lex_ident_or_kw(&mut self) -> Result<Token<'a>, ParseError> {
234        let start = self.pos;
235        while let Some(b) = self.peek() {
236            if b.is_ascii_alphanumeric() || b == b'_' { self.pos += 1; } else { break; }
237        }
238        let s = self.slice(start, self.pos);
239        let kind = match s {
240            "if" => TokKind::If,
241            "else" => TokKind::Else,
242            "for" => TokKind::For,
243            "in" => TokKind::In,
244            _ => TokKind::Ident,
245        };
246        Ok(Token { kind, pos: start, text: TokText::Slice(s) })
247    }
248    fn lex_string(&mut self) -> Result<Token<'a>, ParseError> {
249        let start = self.pos;
250        self.bump(); // consume opening "
251        let mut out = String::new();
252        while let Some(b) = self.bump() {
253            match b {
254                b'"' => return Ok(Token { kind: TokKind::Str, pos: start, text: TokText::Owned(out) }),
255                b'\\' => {
256                    let esc = self.bump().ok_or(ParseError { msg: "unterminated escape".into(), pos: self.pos })?;
257                    match esc {
258                        b'"' => out.push('"'),
259                        b'\\' => out.push('\\'),
260                        b'n' => out.push('\n'),
261                        b't' => out.push('\t'),
262                        b'r' => out.push('\r'),
263                        _ => return Err(ParseError { msg: "invalid escape".into(), pos: self.pos }),
264                    }
265                }
266                _ => out.push(b as char),
267            }
268        }
269        Err(ParseError { msg: "unterminated string".into(), pos: start })
270    }
271    fn lex_number(&mut self) -> Result<Token<'a>, ParseError> {
272        let start = self.pos;
273        if self.peek() == Some(b'-') { self.pos += 1; }
274        let mut has_dot = false;
275        let mut has_exp = false;
276        while let Some(b) = self.peek() {
277            match b {
278                b'0'..=b'9' => self.pos += 1,
279                b'.' => { has_dot = true; self.pos += 1; }
280                b'e' | b'E' => { has_exp = true; self.pos += 1; if matches!(self.peek(), Some(b'+') | Some(b'-')) { self.pos += 1; } }
281                _ => break,
282            }
283        }
284        let s = &self.bytes[start..self.pos];
285        if has_dot || has_exp {
286            lexical::parse::<f64>(s).map_err(|_| ParseError { msg: "invalid float".into(), pos: start })?;
287            Ok(Token { kind: TokKind::Float, pos: start, text: TokText::Slice(self.slice(start, self.pos)) })
288        } else {
289            lexical::parse::<i64>(s).map_err(|_| ParseError { msg: "invalid int".into(), pos: start })?;
290            Ok(Token { kind: TokKind::Int, pos: start, text: TokText::Slice(self.slice(start, self.pos)) })
291        }
292    }
293}
294
295//
296// Parser
297//
298struct Parser<'a, 'i> {
299    lex: Lexer<'a>,
300    cur: Token<'a>,
301    interner: &'i mut Interner,
302}
303impl<'a, 'i> Parser<'a, 'i> {
304    fn new(src: &'a str, interner: &'i mut Interner) -> Result<Self, ParseError> {
305        let mut lex = Lexer::new(src);
306        let cur = lex.next_token()?;
307        Ok(Self { lex, cur, interner })
308    }
309    fn bump(&mut self) -> Result<(), ParseError> { self.cur = self.lex.next_token()?; Ok(()) }
310    fn expect(&mut self, kind: TokKind) -> Result<(), ParseError> {
311        if self.cur.kind == kind { self.bump() } else {
312            Err(ParseError { msg: format!("expected {:?}, found {:?}", kind, self.cur.kind), pos: self.cur.pos })
313        }
314    }
315    fn parse_program(&mut self) -> Result<Program, ParseError> {
316        let mut stmts = Vec::new();
317        while self.cur.kind != TokKind::Eof {
318            stmts.push(self.parse_stmt()?);
319        }
320        Ok(Program { stmts })
321    }
322    fn parse_stmt(&mut self) -> Result<Stmt, ParseError> {
323        match self.cur.kind {
324            TokKind::If => self.parse_if(),
325            TokKind::For => self.parse_for(),
326            TokKind::Ident => {
327                // Could be assignment or call as statement
328                let name_sym = self.interner.get_or_intern(self.cur.text.as_str());
329                self.bump()?;
330                if self.cur.kind == TokKind::Assign {
331                    self.bump()?;
332                    let expr = self.parse_expr()?;
333                    Ok(Stmt::Assign(name_sym, expr))
334                } else if self.cur.kind == TokKind::LParen {
335                    let call = self.finish_call(name_sym)?;
336                    Ok(Stmt::ExprStmt(call))
337                } else {
338                    Err(ParseError { msg: "expected '=' or '(' after identifier".into(), pos: self.cur.pos })
339                }
340            }
341            _ => {
342                // allow expr-stmt (e.g., foo(...))
343                let expr = self.parse_expr()?;
344                Ok(Stmt::ExprStmt(expr))
345            }
346        }
347    }
348    fn parse_if(&mut self) -> Result<Stmt, ParseError> {
349        self.expect(TokKind::If)?;
350        let cond = self.parse_expr()?;
351        let then_blk = self.parse_block()?;
352        let else_blk = if self.cur.kind == TokKind::Else {
353            self.bump()?;
354            self.parse_block()?
355        } else { Vec::new() };
356        Ok(Stmt::If { cond, then_blk, else_blk })
357    }
358    fn parse_for(&mut self) -> Result<Stmt, ParseError> {
359        self.expect(TokKind::For)?;
360        let var = if let TokKind::Ident = self.cur.kind {
361            let sym = self.interner.get_or_intern(self.cur.text.as_str());
362            self.bump()?;
363            sym
364        } else {
365            return Err(ParseError { msg: "expected loop variable".into(), pos: self.cur.pos });
366        };
367        self.expect(TokKind::In)?;
368        let start = self.parse_expr()?;
369        self.expect(TokKind::RangeDots)?;
370        let end = self.parse_expr()?;
371        let body = self.parse_block()?;
372        Ok(Stmt::For { var, start, end, body })
373    }
374    fn parse_block(&mut self) -> Result<Vec<Stmt>, ParseError> {
375        self.expect(TokKind::LBrace)?;
376        let mut stmts = Vec::new();
377        while self.cur.kind != TokKind::RBrace {
378            if self.cur.kind == TokKind::Eof {
379                return Err(ParseError { msg: "unterminated block".into(), pos: self.cur.pos });
380            }
381            stmts.push(self.parse_stmt()?);
382        }
383        self.expect(TokKind::RBrace)?;
384        Ok(stmts)
385    }
386    fn parse_expr(&mut self) -> Result<Expr, ParseError> {
387        match self.cur.kind {
388            TokKind::Int => {
389                let s = self.cur.text.as_str();
390                let i = lexical::parse::<i64>(s.as_bytes()).unwrap();
391                self.bump()?;
392                Ok(Expr::Literal(Value::Int(i)))
393            }
394            TokKind::Float => {
395                let s = self.cur.text.as_str();
396                let f = lexical::parse::<f64>(s.as_bytes()).unwrap();
397                self.bump()?;
398                Ok(Expr::Literal(Value::Float(f)))
399            }
400            TokKind::Str => {
401                let s = self.cur.text.as_str();
402                let sym = self.interner.get_or_intern(s);
403                self.bump()?;
404                Ok(Expr::Literal(Value::Str(sym)))
405            }
406            TokKind::Ident => {
407                let name = self.interner.get_or_intern(self.cur.text.as_str());
408                self.bump()?;
409                if self.cur.kind == TokKind::LParen {
410                    self.finish_call(name)
411                } else {
412                    Ok(Expr::Var(name))
413                }
414            }
415            _ => Err(ParseError { msg: "expected expression".into(), pos: self.cur.pos }),
416        }
417    }
418    fn finish_call(&mut self, func: Symbol) -> Result<Expr, ParseError> {
419        self.expect(TokKind::LParen)?;
420        let mut args = Vec::new();
421        if self.cur.kind != TokKind::RParen {
422            loop {
423                args.push(self.parse_expr()?);
424                if self.cur.kind == TokKind::Comma {
425                    self.bump()?;
426                    continue;
427                }
428                break;
429            }
430        }
431        self.expect(TokKind::RParen)?;
432        Ok(Expr::Call(func, args))
433    }
434}
435
436//
437// Executor
438//
439impl<S> Interpreter<S> {
440    fn exec_block(&mut self, blk: &[Stmt], state: &mut S, env: &mut HashMap<Symbol, Value>) -> Result<(), RuntimeError> {
441        for stmt in blk { self.exec_stmt(stmt, state, env)?; }
442        Ok(())
443    }
444    fn exec_stmt(&mut self, stmt: &Stmt, state: &mut S, env: &mut HashMap<Symbol, Value>) -> Result<(), RuntimeError> {
445        match stmt {
446            Stmt::Assign(name, expr) => {
447                let v = self.eval_expr(expr, state, env)?;
448                env.insert(*name, v);
449                Ok(())
450            }
451            Stmt::ExprStmt(expr) => {
452                let _ = self.eval_expr(expr, state, env)?;
453                Ok(())
454            }
455            Stmt::If { cond, then_blk, else_blk } => {
456                let c = self.eval_expr(cond, state, env)?;
457                match c {
458                    Value::Bool(true) => self.exec_block(then_blk, state, env),
459                    Value::Bool(false) => self.exec_block(else_blk, state, env),
460                    _ => Err(RuntimeError { msg: "if condition must be bool".into() }),
461                }
462            }
463            Stmt::For { var, start, end, body } => {
464                let s = self.eval_expr(start, state, env)?;
465                let e = self.eval_expr(end, state, env)?;
466                let (Some(mut s), Some(e)) = (s.as_int(), e.as_int()) else {
467                    return Err(RuntimeError { msg: "for range must be integers".into() });
468                };
469                while s <= e {
470                    env.insert(*var, Value::Int(s));
471                    self.exec_block(body, state, env)?;
472                    s += 1;
473                }
474                Ok(())
475            }
476        }
477    }
478    fn eval_expr(&mut self, expr: &Expr, state: &mut S, env: &mut HashMap<Symbol, Value>) -> Result<Value, RuntimeError> {
479        match expr {
480            Expr::Literal(v) => Ok(v.clone()),
481            Expr::Var(sym) => {
482                env.get(sym).cloned().ok_or_else(|| RuntimeError { msg: format!("undefined variable '{}'", self.interner.resolve(*sym)) })
483            }
484            Expr::Call(func_sym, args_exprs) => {
485                let mut args = Vec::with_capacity(args_exprs.len());
486                for e in args_exprs { args.push(self.eval_expr(e, state, env)?); }
487                let func_name = self.interner.resolve(*func_sym);
488                if let Some(f) = self.builtins.get(func_name) {
489                    f(&args, state, &mut self.interner)
490                } else {
491                    Err(RuntimeError { msg: format!("unknown function '{}'", func_name) })
492                }
493            }
494        }
495    }
496}
497
498//
499// Doc-test example
500//
501#[cfg(test)]
502mod tests {
503    use std::time::Instant;
504
505    use super::*;
506    use phf::phf_map;
507
508    #[derive(Default)]
509    struct State {
510        out: String,
511    }
512
513    fn add(args: &[Value], _st: &mut State, _i: &mut Interner) -> Result<Value, RuntimeError> {
514        if args.len() != 2 { return Err(RuntimeError { msg: "add expects 2 args".into() }); }
515        match (&args[0], &args[1]) {
516            (Value::Int(a), Value::Int(b)) => Ok(Value::Int(a + b)),
517            (Value::Float(a), Value::Float(b)) => Ok(Value::Float(a + b)),
518            (Value::Int(a), Value::Float(b)) => Ok(Value::Float(*a as f64 + *b)),
519            (Value::Float(a), Value::Int(b)) => Ok(Value::Float(*a + *b as f64)),
520            _ => Err(RuntimeError { msg: "add expects numbers".into() }),
521        }
522    }
523
524    fn equals(args: &[Value], _st: &mut State, i: &mut Interner) -> Result<Value, RuntimeError> {
525        if args.len() != 2 { return Err(RuntimeError { msg: "equals expects 2 args".into() }); }
526        let eq = match (&args[0], &args[1]) {
527            (Value::Int(a), Value::Int(b)) => a == b,
528            (Value::Float(a), Value::Float(b)) => a == b,
529            (Value::Int(a), Value::Float(b)) => (*a as f64) == *b,
530            (Value::Float(a), Value::Int(b)) => *a == (*b as f64),
531            (Value::Bool(a), Value::Bool(b)) => a == b,
532            (Value::Str(sa), Value::Str(sb)) => i.resolve(*sa) == i.resolve(*sb),
533            _ => false,
534        };
535        Ok(Value::Bool(eq))
536    }
537
538    fn to_str(args: &[Value], _st: &mut State, i: &mut Interner) -> Result<Value, RuntimeError> {
539        if args.len() != 1 { return Err(RuntimeError { msg: "to_str expects 1 arg".into() }); }
540        let s = match &args[0] {
541            Value::Int(v) => v.to_string(),
542            Value::Float(v) => v.to_string(),
543            Value::Bool(v) => v.to_string(),
544            Value::Str(sym) => i.resolve(*sym).to_string(),
545            Value::Unit => "unit".to_string(),
546        };
547        Ok(Value::Str(i.get_or_intern(&s)))
548    }
549
550    fn concat(args: &[Value], _st: &mut State, i: &mut Interner) -> Result<Value, RuntimeError> {
551        if args.len() != 2 { return Err(RuntimeError { msg: "concat expects 2 args".into() }); }
552        let a = match &args[0] { Value::Str(s) => i.resolve(*s), _ => return Err(RuntimeError { msg: "concat arg 1 must be str".into() }) };
553        let b = match &args[1] { Value::Str(s) => i.resolve(*s), _ => return Err(RuntimeError { msg: "concat arg 2 must be str".into() }) };
554        let mut s = String::with_capacity(a.len() + b.len());
555        s.push_str(a); s.push_str(b);
556        Ok(Value::Str(i.get_or_intern(&s)))
557    }
558
559    fn print(args: &[Value], st: &mut State, i: &mut Interner) -> Result<Value, RuntimeError> {
560        for (idx, v) in args.iter().enumerate() {
561            if idx > 0 { st.out.push(' '); }
562            match v {
563                Value::Int(x) => st.out.push_str(&x.to_string()),
564                Value::Float(x) => st.out.push_str(&x.to_string()),
565                Value::Bool(x) => st.out.push_str(&x.to_string()),
566                Value::Str(sym) => st.out.push_str(i.resolve(*sym)),
567                Value::Unit => st.out.push_str("unit"),
568            }
569        }
570        st.out.push('\n');
571        Ok(Value::Unit)
572    }
573
574    static BUILTINS: phf::Map<&'static str, Builtin<State>> = phf_map! {
575        "add"    => add as Builtin<State>,
576        "equals" => equals as Builtin<State>,
577        "to_str" => to_str as Builtin<State>,
578        "concat" => concat as Builtin<State>,
579        "print"  => print as Builtin<State>,
580    };
581
582    #[test]
583    fn runs_sample() {
584        let code = r#"
585a = 0
586b = 1
587c = add(a, b)
588print(c)
589
590if equals(c, 1) {
591  print("one")
592} else {
593  print("not one")
594}
595
596sum = 0
597for i in 1 .. 5 {
598  sum = add(sum, i)
599}
600print(concat("sum = ", to_str(15)))
601"#;     
602        let a = Instant::now();
603        let mut interp = Interpreter::<State>::new(&BUILTINS);
604        let mut st = State::default();
605        interp.run(&mut st, code).unwrap();
606        assert!(st.out.contains("1"));
607        assert!(st.out.contains("one"));
608        assert!(st.out.contains("sum = 15"));
609        println!("this many microseconds have passed {}",a.elapsed().as_micros())
610    }
611}