zlang 0.1.1

a simple embeddable language implemented for rust, it is called zlang because it is the last embedded language a sane developer would reach for.
Documentation
use std::{
    any::Any,
    cell::RefCell,
    collections::HashMap,
    rc::Rc,
};

#[derive(Clone)]
pub enum ZData {
    Raw(Rc<str>),
    Dyn(Rc<dyn Any>),
}

#[derive(Clone)]
pub struct ZType {
    pub tag: Rc<str>,  // dynamic tags remain supported
    pub data: ZData,
}

type FnPtr<State> = fn(&mut State, &ZLang<State>, Vec<ZType>) -> Option<ZType>;

pub struct ZLang<State> {
    // Globals: variable slots and storage
    var_slots: RefCell<HashMap<String, usize>>,
    vars: RefCell<Vec<Option<ZType>>>,

    // Function registry by name (compile-time resolution)
    functions: HashMap<String, FnPtr<State>>,

    // Common tag for raw literals
    raw_tag: Rc<str>,
}

#[derive(Clone, Debug)]
pub enum Tokens<'a> {
    Ident(&'a str),
    Raw(&'a str),
    Equals,
    LParen,
    RParen,
    Comma,
}

enum Op<State> {
    // Frame to collect only present argument values for a call
    PushFrame,
    // Discard everything pushed since the last frame (used for unknown functions)
    DiscardFrame,

    // Values and variables
    LoadVar(usize),
    LoadRaw(usize), // index into literal pool
    StoreVar(usize),

    // Calls
    Call(FnPtr<State>),

    // Statements
    Pop,            // discard top value if present
}

struct Program<State> {
    ops: Vec<Op<State>>,
    literals: Vec<Rc<str>>,
}

impl<State> ZLang<State> {
    pub fn new() -> Self {
        Self {
            var_slots: RefCell::new(HashMap::new()),
            vars: RefCell::new(Vec::new()),
            functions: HashMap::new(),
            raw_tag: Rc::<str>::from("raw"),
        }
    }

    pub fn register_function(
        &mut self,
        name: impl Into<String>,
        function: FnPtr<State>,
    ) {
        self.functions.insert(name.into(), function);
    }

    pub fn interpret(&self, state: &mut State, code: &str) {
        let tokens = self.tokenize(code);
        let prog = self.compile(&tokens);
        self.execute(state, &prog);
    }

    // -----------------------------
    // Tokenization (UTF-8 safe, no per-step bounds checks)
    // -----------------------------
    fn tokenize<'a>(&self, code: &'a str) -> Vec<Tokens<'a>> {
        let mut toks = Vec::with_capacity(code.len() / 4);
        let mut it = code.char_indices().peekable();
        let mut paren_depth = 0usize;

        while let Some(&(i, c)) = it.peek() {
            match c {
                ' ' | '\t' | '\r' | '\n' => {
                    it.next();
                }
                '=' => {
                    toks.push(Tokens::Equals);
                    it.next();
                }
                '(' => {
                    paren_depth = paren_depth.saturating_add(1);
                    toks.push(Tokens::LParen);
                    it.next();
                }
                ')' => {
                    if paren_depth > 0 {
                        paren_depth -= 1;
                    }
                    toks.push(Tokens::RParen);
                    it.next();
                }
                ',' => {
                    if paren_depth > 0 {
                        toks.push(Tokens::Comma);
                    }
                    it.next();
                }
                '"' => {
                    // string literal
                    let start = i + c.len_utf8();
                    it.next(); // consume the opening quote
                    let mut end = start;
                    loop {
                        match it.next() {
                            Some((j, ch)) if ch == '"' => {
                                end = j;
                                break;
                            }
                            Some((j, ch)) => {
                                end = j + ch.len_utf8();
                            }
                            None => {
                                end = code.len();
                                break;
                            }
                        }
                    }
                    toks.push(Tokens::Raw(&code[start..end]));
                }
                _ => {
                    if c == '_' || c.is_ascii_alphanumeric() {
                        // scan identifier
                        let start = i;
                        let mut end = i + c.len_utf8();
                        it.next(); // consume first char
                        while let Some(&(j, ch)) = it.peek() {
                            if ch == '_' || ch.is_ascii_alphanumeric() {
                                end = j + ch.len_utf8();
                                it.next();
                            } else {
                                break;
                            }
                        }
                        toks.push(Tokens::Ident(&code[start..end]));
                    } else {
                        // skip unknown
                        it.next();
                    }
                }
            }
        }
        toks
    }


    // -----------------------------
    // Compile to bytecode-like ops (no AST kept)
    // -----------------------------
    fn compile<'a>(&'a self, toks: &'a [Tokens<'a>]) -> Program<State> {
        let mut pos = 0usize;
        let len = toks.len();
        let mut ops: Vec<Op<State>> = Vec::with_capacity(len); // rough
        let mut literals: Vec<Rc<str>> = Vec::new();

        // Helpers capture
        let mut literal_index = |s: &str| -> usize {
            literals.push(Rc::<str>::from(s));
            literals.len() - 1
        };

        // Parse a single expression and emit ops; returns true if it was a direct raw literal
        fn compile_expr<State>(
            lang: &ZLang<State>,
            toks: &[Tokens<'_>],
            pos: &mut usize,
            ops: &mut Vec<Op<State>>,
            literal_index: &mut impl FnMut(&str) -> usize,
        ) -> bool {
            let len = toks.len();
            if *pos >= len {
                return false;
            }

            match &toks[*pos] {
                Tokens::Ident(name) => {
                    // lookahead for call or var
                    if *pos + 1 < len && matches!(toks[*pos + 1], Tokens::LParen) {
                        // function call: ident '(' args ')'
                        let ident = *name;
                        *pos += 2; // consume ident, '('
                        ops.push(Op::PushFrame);

                        // parse arguments: zero or more expr, separated by commas, until ')'
                        while *pos < len && !matches!(toks[*pos], Tokens::RParen) {
                            compile_expr(lang, toks, pos, ops, literal_index);
                            if *pos < len && matches!(toks[*pos], Tokens::Comma) {
                                *pos += 1; // consume comma
                            } else {
                                // either ')' or end or next token starts another expr
                            }
                        }
                        // expect ')'
                        if *pos < len && matches!(toks[*pos], Tokens::RParen) {
                            *pos += 1;
                        }

                        if let Some(&fp) = lang.functions.get(ident) {
                            ops.push(Op::Call(fp));
                        } else {
                            eprintln!("Unknown function `{}`", ident);
                            // Drop any argument results
                            ops.push(Op::DiscardFrame);
                        }
                        false
                    } else {
                        // variable ref
                        let slot = lang.get_or_create_slot(*name);
                        *pos += 1;
                        ops.push(Op::LoadVar(slot));
                        false
                    }
                }
                Tokens::Raw(s) => {
                    let idx = literal_index(s);
                    *pos += 1;
                    ops.push(Op::LoadRaw(idx));
                    true
                }
                _ => {
                    // skip token and treat as no-op expression
                    *pos += 1;
                    false
                }
            }
        }

        while pos < len {
            match &toks[pos] {
                Tokens::Ident(name) if pos + 1 < len && matches!(toks[pos + 1], Tokens::Equals) => {
                    // assignment: name '=' expr
                    let var_name = *name;
                    let slot = self.get_or_create_slot(var_name);
                    pos += 2; // consume name and '='

                    // Disallow direct raw literal assignment (compile-time check)
                    let starts_with_raw = matches!(toks.get(pos), Some(Tokens::Raw(_)));
                    let was_raw = compile_expr(self, toks, &mut pos, &mut ops, &mut literal_index);

                    if starts_with_raw && was_raw {
                        eprintln!(
                            "Error: cannot assign raw string directly to variable `{}`",
                            var_name
                        );
                        // discard computed value if any
                        ops.push(Op::Pop);
                    } else {
                        ops.push(Op::StoreVar(slot));
                    }
                }
                // expression statement
                Tokens::Ident(_) | Tokens::Raw(_) | Tokens::LParen => {
                    let _ = compile_expr(self, toks, &mut pos, &mut ops, &mut literal_index);
                    // discard trailing value if present
                    ops.push(Op::Pop);
                }
                _ => {
                    pos += 1;
                }
            }
        }

        Program { ops, literals }
    }

    // -----------------------------
    // Execution engine (stack + frames, zero-alloc hot path)
    // -----------------------------
    fn execute(&self, state: &mut State, prog: &Program<State>) {
        let mut stack: Vec<ZType> = Vec::with_capacity(16);
        let mut frames: Vec<usize> = Vec::with_capacity(8); // stores stack base indices
        let mut arg_buf: Vec<ZType> = Vec::with_capacity(8); // reused argument buffer

        for op in &prog.ops {
            match op {
                Op::PushFrame => {
                    frames.push(stack.len());
                }
                Op::DiscardFrame => {
                    if let Some(base) = frames.pop() {
                        stack.truncate(base);
                    }
                }
                Op::LoadVar(slot) => {
                    if let Some(val) = self.vars.borrow().get(*slot).and_then(|o| o.clone()) {
                        stack.push(val);
                    } // else: absent => no push (keeps Option semantics)
                }
                Op::LoadRaw(idx) => {
                    let s = prog.literals[*idx].clone();
                    stack.push(ZType {
                        tag: self.raw_tag.clone(),
                        data: ZData::Raw(s),
                    });
                }
                Op::StoreVar(slot) => {
                    if let Some(val) = stack.pop() {
                        self.set_slot(*slot, val);
                    } else {
                        eprintln!("Error: assignment has no value");
                    }
                }
                Op::Call(fp) => {
                    // collect args since last frame
                    let base = match frames.pop() {
                        Some(b) => b,
                        None => {
                            // malformed program; be defensive
                            arg_buf.clear();
                            if let Some(_v) = stack.pop() {
                                // drop one value if present
                            }
                            continue;
                        }
                    };
                    // drain in order
                    arg_buf.clear();
                    // Move values [base..] into arg_buf preserving order
                    while stack.len() > base {
                        // popping reverses; collect in temp then reverse or use remove
                        // For efficiency, swap-remove into a temp then reverse once
                        // Simpler: drain to a temp vec and extend
                        break;
                    }
                    // Efficient drain preserving order:
                    let mut tail: Vec<ZType> = stack.drain(base..).collect();
                    arg_buf.append(&mut tail);

                    if let Some(ret) = fp(state, self, std::mem::take(&mut arg_buf)) {
                        stack.push(ret);
                    }
                    // else: no push (Option semantics)
                }
                Op::Pop => {
                    let _ = stack.pop();
                }
            }
        }
    }

    // Slot management: map name -> slot, ensure storage exists
    fn get_or_create_slot(&self, name: &str) -> usize {
        // Fast path: check without mut borrow
        if let Some(&slot) = self.var_slots.borrow().get(name) {
            return slot;
        }
        // Create
        let mut slots = self.var_slots.borrow_mut();
        if let Some(&slot) = slots.get(name) {
            return slot;
        }
        let mut vars = self.vars.borrow_mut();
        let slot = vars.len();
        vars.push(None);
        slots.insert(name.to_owned(), slot);
        slot
    }

    fn set_slot(&self, slot: usize, val: ZType) {
        let mut vars = self.vars.borrow_mut();
        if slot >= vars.len() {
            vars.resize(slot + 1, None);
        }
        vars[slot] = Some(val);
    }
}