zlang/lib.rs
1use std::{
2 any::Any,
3 cell::RefCell,
4 collections::HashMap,
5 rc::Rc,
6};
7
8#[derive(Clone)]
9pub enum ZData {
10 Raw(Rc<str>),
11 Dyn(Rc<dyn Any>),
12}
13
14#[derive(Clone)]
15pub struct ZType {
16 pub tag: Rc<str>, // dynamic tags remain supported
17 pub data: ZData,
18}
19
20type FnPtr<State> = fn(&mut State, &ZLang<State>, Vec<ZType>) -> Option<ZType>;
21
22pub struct ZLang<State> {
23 // Globals: variable slots and storage
24 var_slots: RefCell<HashMap<String, usize>>,
25 vars: RefCell<Vec<Option<ZType>>>,
26
27 // Function registry by name (compile-time resolution)
28 functions: HashMap<String, FnPtr<State>>,
29
30 // Common tag for raw literals
31 raw_tag: Rc<str>,
32}
33
34#[derive(Clone, Debug)]
35pub enum Tokens<'a> {
36 Ident(&'a str),
37 Raw(&'a str),
38 Equals,
39 LParen,
40 RParen,
41 Comma,
42}
43
44enum Op<State> {
45 // Frame to collect only present argument values for a call
46 PushFrame,
47 // Discard everything pushed since the last frame (used for unknown functions)
48 DiscardFrame,
49
50 // Values and variables
51 LoadVar(usize),
52 LoadRaw(usize), // index into literal pool
53 StoreVar(usize),
54
55 // Calls
56 Call(FnPtr<State>),
57
58 // Statements
59 Pop, // discard top value if present
60}
61
62struct Program<State> {
63 ops: Vec<Op<State>>,
64 literals: Vec<Rc<str>>,
65}
66
67impl<State> ZLang<State> {
68 pub fn new() -> Self {
69 Self {
70 var_slots: RefCell::new(HashMap::new()),
71 vars: RefCell::new(Vec::new()),
72 functions: HashMap::new(),
73 raw_tag: Rc::<str>::from("raw"),
74 }
75 }
76
77 pub fn register_function(
78 &mut self,
79 name: impl Into<String>,
80 function: FnPtr<State>,
81 ) {
82 self.functions.insert(name.into(), function);
83 }
84
85 pub fn interpret(&self, state: &mut State, code: &str) {
86 let tokens = self.tokenize(code);
87 let prog = self.compile(&tokens);
88 self.execute(state, &prog);
89 }
90
91 // -----------------------------
92 // Tokenization (UTF-8 safe, no per-step bounds checks)
93 // -----------------------------
94 fn tokenize<'a>(&self, code: &'a str) -> Vec<Tokens<'a>> {
95 let mut toks = Vec::with_capacity(code.len() / 4);
96 let mut it = code.char_indices().peekable();
97 let mut paren_depth = 0usize;
98
99 while let Some(&(i, c)) = it.peek() {
100 match c {
101 ' ' | '\t' | '\r' | '\n' => {
102 it.next();
103 }
104 '=' => {
105 toks.push(Tokens::Equals);
106 it.next();
107 }
108 '(' => {
109 paren_depth = paren_depth.saturating_add(1);
110 toks.push(Tokens::LParen);
111 it.next();
112 }
113 ')' => {
114 if paren_depth > 0 {
115 paren_depth -= 1;
116 }
117 toks.push(Tokens::RParen);
118 it.next();
119 }
120 ',' => {
121 if paren_depth > 0 {
122 toks.push(Tokens::Comma);
123 }
124 it.next();
125 }
126 '"' => {
127 // string literal
128 let start = i + c.len_utf8();
129 it.next(); // consume the opening quote
130 let mut end = start;
131 loop {
132 match it.next() {
133 Some((j, ch)) if ch == '"' => {
134 end = j;
135 break;
136 }
137 Some((j, ch)) => {
138 end = j + ch.len_utf8();
139 }
140 None => {
141 end = code.len();
142 break;
143 }
144 }
145 }
146 toks.push(Tokens::Raw(&code[start..end]));
147 }
148 _ => {
149 if c == '_' || c.is_ascii_alphanumeric() {
150 // scan identifier
151 let start = i;
152 let mut end = i + c.len_utf8();
153 it.next(); // consume first char
154 while let Some(&(j, ch)) = it.peek() {
155 if ch == '_' || ch.is_ascii_alphanumeric() {
156 end = j + ch.len_utf8();
157 it.next();
158 } else {
159 break;
160 }
161 }
162 toks.push(Tokens::Ident(&code[start..end]));
163 } else {
164 // skip unknown
165 it.next();
166 }
167 }
168 }
169 }
170 toks
171 }
172
173
174 // -----------------------------
175 // Compile to bytecode-like ops (no AST kept)
176 // -----------------------------
177 fn compile<'a>(&'a self, toks: &'a [Tokens<'a>]) -> Program<State> {
178 let mut pos = 0usize;
179 let len = toks.len();
180 let mut ops: Vec<Op<State>> = Vec::with_capacity(len); // rough
181 let mut literals: Vec<Rc<str>> = Vec::new();
182
183 // Helpers capture
184 let mut literal_index = |s: &str| -> usize {
185 literals.push(Rc::<str>::from(s));
186 literals.len() - 1
187 };
188
189 // Parse a single expression and emit ops; returns true if it was a direct raw literal
190 fn compile_expr<State>(
191 lang: &ZLang<State>,
192 toks: &[Tokens<'_>],
193 pos: &mut usize,
194 ops: &mut Vec<Op<State>>,
195 literal_index: &mut impl FnMut(&str) -> usize,
196 ) -> bool {
197 let len = toks.len();
198 if *pos >= len {
199 return false;
200 }
201
202 match &toks[*pos] {
203 Tokens::Ident(name) => {
204 // lookahead for call or var
205 if *pos + 1 < len && matches!(toks[*pos + 1], Tokens::LParen) {
206 // function call: ident '(' args ')'
207 let ident = *name;
208 *pos += 2; // consume ident, '('
209 ops.push(Op::PushFrame);
210
211 // parse arguments: zero or more expr, separated by commas, until ')'
212 while *pos < len && !matches!(toks[*pos], Tokens::RParen) {
213 compile_expr(lang, toks, pos, ops, literal_index);
214 if *pos < len && matches!(toks[*pos], Tokens::Comma) {
215 *pos += 1; // consume comma
216 } else {
217 // either ')' or end or next token starts another expr
218 }
219 }
220 // expect ')'
221 if *pos < len && matches!(toks[*pos], Tokens::RParen) {
222 *pos += 1;
223 }
224
225 if let Some(&fp) = lang.functions.get(ident) {
226 ops.push(Op::Call(fp));
227 } else {
228 eprintln!("Unknown function `{}`", ident);
229 // Drop any argument results
230 ops.push(Op::DiscardFrame);
231 }
232 false
233 } else {
234 // variable ref
235 let slot = lang.get_or_create_slot(*name);
236 *pos += 1;
237 ops.push(Op::LoadVar(slot));
238 false
239 }
240 }
241 Tokens::Raw(s) => {
242 let idx = literal_index(s);
243 *pos += 1;
244 ops.push(Op::LoadRaw(idx));
245 true
246 }
247 _ => {
248 // skip token and treat as no-op expression
249 *pos += 1;
250 false
251 }
252 }
253 }
254
255 while pos < len {
256 match &toks[pos] {
257 Tokens::Ident(name) if pos + 1 < len && matches!(toks[pos + 1], Tokens::Equals) => {
258 // assignment: name '=' expr
259 let var_name = *name;
260 let slot = self.get_or_create_slot(var_name);
261 pos += 2; // consume name and '='
262
263 // Disallow direct raw literal assignment (compile-time check)
264 let starts_with_raw = matches!(toks.get(pos), Some(Tokens::Raw(_)));
265 let was_raw = compile_expr(self, toks, &mut pos, &mut ops, &mut literal_index);
266
267 if starts_with_raw && was_raw {
268 eprintln!(
269 "Error: cannot assign raw string directly to variable `{}`",
270 var_name
271 );
272 // discard computed value if any
273 ops.push(Op::Pop);
274 } else {
275 ops.push(Op::StoreVar(slot));
276 }
277 }
278 // expression statement
279 Tokens::Ident(_) | Tokens::Raw(_) | Tokens::LParen => {
280 let _ = compile_expr(self, toks, &mut pos, &mut ops, &mut literal_index);
281 // discard trailing value if present
282 ops.push(Op::Pop);
283 }
284 _ => {
285 pos += 1;
286 }
287 }
288 }
289
290 Program { ops, literals }
291 }
292
293 // -----------------------------
294 // Execution engine (stack + frames, zero-alloc hot path)
295 // -----------------------------
296 fn execute(&self, state: &mut State, prog: &Program<State>) {
297 let mut stack: Vec<ZType> = Vec::with_capacity(16);
298 let mut frames: Vec<usize> = Vec::with_capacity(8); // stores stack base indices
299 let mut arg_buf: Vec<ZType> = Vec::with_capacity(8); // reused argument buffer
300
301 for op in &prog.ops {
302 match op {
303 Op::PushFrame => {
304 frames.push(stack.len());
305 }
306 Op::DiscardFrame => {
307 if let Some(base) = frames.pop() {
308 stack.truncate(base);
309 }
310 }
311 Op::LoadVar(slot) => {
312 if let Some(val) = self.vars.borrow().get(*slot).and_then(|o| o.clone()) {
313 stack.push(val);
314 } // else: absent => no push (keeps Option semantics)
315 }
316 Op::LoadRaw(idx) => {
317 let s = prog.literals[*idx].clone();
318 stack.push(ZType {
319 tag: self.raw_tag.clone(),
320 data: ZData::Raw(s),
321 });
322 }
323 Op::StoreVar(slot) => {
324 if let Some(val) = stack.pop() {
325 self.set_slot(*slot, val);
326 } else {
327 eprintln!("Error: assignment has no value");
328 }
329 }
330 Op::Call(fp) => {
331 // collect args since last frame
332 let base = match frames.pop() {
333 Some(b) => b,
334 None => {
335 // malformed program; be defensive
336 arg_buf.clear();
337 if let Some(_v) = stack.pop() {
338 // drop one value if present
339 }
340 continue;
341 }
342 };
343 // drain in order
344 arg_buf.clear();
345 // Move values [base..] into arg_buf preserving order
346 while stack.len() > base {
347 // popping reverses; collect in temp then reverse or use remove
348 // For efficiency, swap-remove into a temp then reverse once
349 // Simpler: drain to a temp vec and extend
350 break;
351 }
352 // Efficient drain preserving order:
353 let mut tail: Vec<ZType> = stack.drain(base..).collect();
354 arg_buf.append(&mut tail);
355
356 if let Some(ret) = fp(state, self, std::mem::take(&mut arg_buf)) {
357 stack.push(ret);
358 }
359 // else: no push (Option semantics)
360 }
361 Op::Pop => {
362 let _ = stack.pop();
363 }
364 }
365 }
366 }
367
368 // Slot management: map name -> slot, ensure storage exists
369 fn get_or_create_slot(&self, name: &str) -> usize {
370 // Fast path: check without mut borrow
371 if let Some(&slot) = self.var_slots.borrow().get(name) {
372 return slot;
373 }
374 // Create
375 let mut slots = self.var_slots.borrow_mut();
376 if let Some(&slot) = slots.get(name) {
377 return slot;
378 }
379 let mut vars = self.vars.borrow_mut();
380 let slot = vars.len();
381 vars.push(None);
382 slots.insert(name.to_owned(), slot);
383 slot
384 }
385
386 fn set_slot(&self, slot: usize, val: ZType) {
387 let mut vars = self.vars.borrow_mut();
388 if slot >= vars.len() {
389 vars.resize(slot + 1, None);
390 }
391 vars[slot] = Some(val);
392 }
393}