qala_compiler/vm.rs
1//! the stack-based bytecode interpreter.
2//!
3//! a [`Vm`] takes a [`Program`] (the chunks, the constant pools, the source
4//! map) and executes it: a value stack of NaN-boxed [`Value`]s, a call-frame
5//! stack, a heap of [`HeapObject`]s, a console buffer, a leak log, and
6//! persistent globals for the REPL. `run()` drives it to completion or to the
7//! first runtime fault; `step()` advances one instruction for the playground's
8//! step-through; both go through the single `dispatch_one()` decoder.
9//!
10//! the absolute constraint: the VM NEVER panics on any bytecode. every
11//! `code[ip]` byte, every operand read, every value-stack pop, every heap
12//! access, every constant-pool index is bounds-checked and surfaces a
13//! [`QalaError::Runtime`] on failure. the crate compiles to WASM, where a
14//! panic aborts the browser tab. there is no `unwrap` outside `#[cfg(test)]`.
15//!
16//! memory model: an `i64` is uniformly heap-boxed -- a [`HeapObject::Int`]
17//! reached through a `TAG_PTR` [`Value`] -- so the codec in `value.rs` carries
18//! no integer encoding (the uniform-heap-box decision; see 05-RESEARCH.md). a
19//! function value, by contrast, is a tagged scalar: [`Value::function`] rides
20//! the `u16` fn-id in the NaN payload with NO heap object. arrays, strings,
21//! structs, enum variants, and file handles are heap objects.
22//!
23//! the heap reclaims slots with reference counting plus a free list.
24//! [`Heap::dec`] returns the freed [`HeapObject`] when a refcount hits zero so
25//! the caller can inspect it (the stdlib's file-handle leak check needs this).
26//! reference cycles are NOT collected in v1 -- a v2 concern; Qala v1's value
27//! semantics make cycles hard to create.
28//!
29//! this module is built up over several commits (the authorized
30//! multi-commit-to-one-file exception): the data model and heap here, then the
31//! dispatch decoder, then the arithmetic / comparison / logic / jump opcode
32//! handlers. CALL / RETURN / the `MAKE_*` family / INDEX / FIELD / LEN / TO_STR
33//! / CONCAT_N / MATCH_VARIANT are stubbed as a clean `Runtime` error until a
34//! later commit fills them in.
35
36use crate::chunk::Chunk;
37use crate::chunk::Program;
38use crate::errors::QalaError;
39use crate::opcode::{Opcode, STDLIB_FN_BASE};
40use crate::span::LineIndex;
41use crate::span::Span;
42use crate::value::ConstValue;
43use crate::value::Value;
44
45/// the call-frame depth cap. a Qala recursion grows the [`Vm::frames`] vec, not
46/// the host Rust stack (the VM is a `while`-loop interpreter), so an unbounded
47/// recursion hits this cap and becomes a clean `Runtime` "stack overflow"
48/// rather than a host stack overflow. 1024 frames is generous for a teaching
49/// language.
50///
51/// enforced in [`Vm::op_call`] and [`Vm::call_function_value`] -- both check
52/// `frames.len() >= MAX_FRAMES` before pushing a frame.
53const MAX_FRAMES: usize = 1024;
54
55/// the value-stack depth cap. [`Vm::push`] errors past it with "value stack
56/// overflow" so a runaway program cannot exhaust WASM memory through the value
57/// stack. 65536 slots is far more than any realistic Qala program needs.
58const MAX_STACK: usize = 65536;
59
60/// the heap slot cap. [`Heap::alloc`] errors past it so a runaway allocation
61/// loop surfaces a clean `Runtime` "heap exhausted" error instead of an
62/// out-of-memory abort of the browser tab. one million slots is the
63/// WASM memory-exhaustion guard; raising it is a v2 concern.
64const MAX_HEAP: usize = 1_000_000;
65
66/// the maximum nesting depth for [`Vm::value_to_string`] and
67/// [`Vm::runtime_type_name`]. beyond this depth the helpers emit `"<...>"`
68/// (display) or `"..."` (type name) rather than recursing further. this
69/// prevents a pathologically deep user value (an array of arrays of ... of
70/// arrays) from overflowing the Rust/WASM call stack and trapping the host.
71const MAX_DISPLAY_DEPTH: u32 = 64;
72
73/// a value living on the VM heap, reached through a `TAG_PTR` [`Value`].
74///
75/// every variant a `MAKE_*` opcode builds, plus [`HeapObject::Int`] -- because
76/// `i64` is uniformly heap-boxed (the codec in `value.rs` has no integer tag).
77/// a [`HeapObject::FileHandle`] that is freed while still open is a resource
78/// leak the VM logs.
79///
80/// derives `Clone, PartialEq` but NOT `Debug` -- a variant carries [`Value`],
81/// and `Value`'s locked derive list (the NaN-box newtype) omits `Debug`. tests
82/// compare `HeapObject`s with `==` rather than `assert_eq!` for that reason.
83#[derive(Clone, PartialEq)]
84pub enum HeapObject {
85 /// a 64-bit signed integer. `i64` has no NaN-box tag, so every integer
86 /// runtime value is one of these, reached through a pointer.
87 Int(i64),
88 /// a dynamic array of values, built by `MAKE_ARRAY`. `push` / `pop` mutate
89 /// it; `INDEX` reads an element; `LEN` reports the element count.
90 Array(Vec<Value>),
91 /// a fixed-shape tuple of values, built by `MAKE_TUPLE`. distinct from
92 /// [`HeapObject::Array`] so the stdlib's `type_of` (plan 05-05) can render
93 /// a tuple's structural type `(i64, str)` rather than an array type. `INDEX`
94 /// reads an element exactly as it does for an array.
95 Tuple(Vec<Value>),
96 /// an owned string, built by a `CONST` of a `ConstValue::Str` or by
97 /// `CONCAT_N` / `TO_STR`.
98 Str(String),
99 /// a struct instance: the declared type name (from `Program.structs`) plus
100 /// the field values in declaration order.
101 Struct {
102 /// the declared struct name, e.g. `"Point"` -- what `type_of` returns.
103 type_name: String,
104 /// the field values, in the struct's declaration order.
105 fields: Vec<Value>,
106 },
107 /// an enum-variant instance: the enum name, the variant name, and the
108 /// variant's payload values (empty for a payload-less variant).
109 EnumVariant {
110 /// the enum's declared name, e.g. `"Shape"`.
111 type_name: String,
112 /// the variant's name, e.g. `"Circle"`.
113 variant: String,
114 /// the variant's payload values, in declaration order.
115 payload: Vec<Value>,
116 },
117 /// a mock file handle -- `open` builds one, `close` marks it closed. the
118 /// VM does no real file I/O (it runs in a WASM sandbox); the handle backs
119 /// the effect system and `defer close(f)` demonstrations. a handle freed
120 /// while still open is a leak.
121 FileHandle {
122 /// the path passed to `open`. purely informational in the mock.
123 path: String,
124 /// the mock file content; `read_all` returns this.
125 content: String,
126 /// `true` once `close` has run on this handle.
127 closed: bool,
128 },
129}
130
131/// one heap slot: a [`HeapObject`] plus its reference count.
132///
133/// the count starts at 1 on `alloc`, rises on `inc`, falls on `dec`; a slot
134/// whose count reaches zero is freed back onto the heap's free list.
135#[derive(Clone)]
136struct HeapSlot {
137 /// the object this slot holds.
138 object: HeapObject,
139 /// the live-reference count. zero means the slot is free.
140 refcount: u32,
141}
142
143/// the VM heap: a slab of [`HeapSlot`]s plus a free list of reusable indices.
144///
145/// `alloc` reuses a freed slot when one is available, else appends. each slot
146/// is reference-counted; a slot whose count hits zero is pushed onto `free`.
147/// every accessor takes a slot index and returns an `Option` -- a bad index is
148/// never an out-of-bounds panic.
149///
150/// reference cycles are NOT collected: two objects that point at each other
151/// keep each other's refcount above zero forever. this is a documented v1
152/// limitation -- a cycle collector is a v2 concern, and Qala v1's value
153/// semantics (no mutable cross-object references) make cycles hard to create.
154#[derive(Clone, Default)]
155pub struct Heap {
156 /// the slot slab, indexed by the `u32` a pointer [`Value`] carries.
157 objects: Vec<HeapSlot>,
158 /// indices of freed slots, reused before the slab grows.
159 free: Vec<u32>,
160}
161
162impl Heap {
163 /// construct an empty heap.
164 pub fn new() -> Self {
165 Self::default()
166 }
167
168 /// allocate `obj` into a slot and return the slot index. a freed slot is
169 /// reused if one is available, else the slab grows by one. the new slot's
170 /// refcount starts at 1.
171 ///
172 /// returns `None` when the slab is already at [`MAX_HEAP`] slots and no
173 /// free slot can be reused -- the caller maps `None` to a `Runtime` "heap
174 /// exhausted" error so a runaway allocation never aborts the host. `None`
175 /// rather than `Result<u32, ()>` keeps the signature lint-clean (a unit
176 /// error type carries no information `None` does not).
177 pub fn alloc(&mut self, obj: HeapObject) -> Option<u32> {
178 if let Some(slot) = self.free.pop() {
179 let idx = slot as usize;
180 // a freed slot is always within bounds: it was a valid index when
181 // dec pushed it onto the free list.
182 if let Some(s) = self.objects.get_mut(idx) {
183 s.object = obj;
184 s.refcount = 1;
185 return Some(slot);
186 }
187 // the free-list index does not exist in the slab -- this violates the
188 // invariant that only valid, previously-freed indices are pushed onto
189 // the free list. do not silently drop the slot: push it back so the
190 // slab stays consistent, then fall through to the append path.
191 debug_assert!(
192 false,
193 "heap free-list contained out-of-range index {slot}; slab len={}",
194 self.objects.len()
195 );
196 self.free.push(slot);
197 }
198 if self.objects.len() >= MAX_HEAP {
199 return None;
200 }
201 let idx = self.objects.len() as u32;
202 self.objects.push(HeapSlot {
203 object: obj,
204 refcount: 1,
205 });
206 Some(idx)
207 }
208
209 /// borrow the object at `slot`. returns `None` for an out-of-range or
210 /// freed slot (a freed slot has refcount 0) -- never an out-of-bounds
211 /// index.
212 pub fn get(&self, slot: u32) -> Option<&HeapObject> {
213 self.objects
214 .get(slot as usize)
215 .filter(|s| s.refcount > 0)
216 .map(|s| &s.object)
217 }
218
219 /// mutably borrow the object at `slot`. returns `None` for an out-of-range
220 /// or freed slot -- never an out-of-bounds index.
221 pub fn get_mut(&mut self, slot: u32) -> Option<&mut HeapObject> {
222 self.objects
223 .get_mut(slot as usize)
224 .filter(|s| s.refcount > 0)
225 .map(|s| &mut s.object)
226 }
227
228 /// increment the refcount of the object at `slot`. a bad or freed slot is
229 /// a silent no-op -- the VM never crashes on a stray inc.
230 ///
231 /// **v1 aliasing invariant**: v1 does NOT alias heap values. every heap
232 /// object has exactly one logical owner; values are moved (or copied as
233 /// tagged scalars) rather than duplicated with shared ownership. as a
234 /// consequence, `inc` is currently unused -- the opcode handlers that
235 /// could in principle produce a second reference to the same slot
236 /// (`DUP` of a pointer, `GET_LOCAL`, `GET_GLOBAL`, `MAKE_ARRAY`,
237 /// `MAKE_TUPLE`, `MAKE_STRUCT`, `MAKE_ENUM_VARIANT`) do not call `inc`
238 /// today. before any v2 work allows genuine aliasing, every one of those
239 /// sites must be audited and wired to `inc` first, or use-after-free
240 /// becomes possible. the `#[allow(dead_code)]` keeps this function present
241 /// as the documented extension point without triggering a compiler warning.
242 #[allow(dead_code)]
243 pub fn inc(&mut self, slot: u32) {
244 if let Some(s) = self
245 .objects
246 .get_mut(slot as usize)
247 .filter(|s| s.refcount > 0)
248 {
249 s.refcount = s.refcount.saturating_add(1);
250 }
251 }
252
253 /// decrement the refcount of the object at `slot`.
254 ///
255 /// when the count reaches zero the slot is freed: its index is pushed onto
256 /// the free list and the freed [`HeapObject`] is RETURNED so the caller can
257 /// inspect it. when the count is still positive after the decrement -- or
258 /// the slot is out of range or already free -- the result is `None`.
259 ///
260 /// the return value is load-bearing: the file-handle leak check in
261 /// [`Vm::check_frame_handle_leaks`] needs `dec` to hand back the freed
262 /// object so the [`Vm`] can detect a still-open [`HeapObject::FileHandle`]
263 /// and push to [`Vm::leak_log`]. the leak-log push is the caller's
264 /// responsibility -- `Heap` has no access to `Vm::leak_log`; `dec` only
265 /// surfaces the freed object. this signature is the locked contract.
266 pub fn dec(&mut self, slot: u32) -> Option<HeapObject> {
267 let s = self.objects.get_mut(slot as usize)?;
268 if s.refcount == 0 {
269 return None;
270 }
271 s.refcount -= 1;
272 if s.refcount == 0 {
273 // free the slot: hand back the object and recycle the index. the
274 // slot keeps a placeholder (a void Int) so the slab stays dense;
275 // the next alloc that reuses this index overwrites it.
276 let freed = std::mem::replace(&mut s.object, HeapObject::Int(0));
277 self.free.push(slot);
278 Some(freed)
279 } else {
280 None
281 }
282 }
283}
284
285/// one call frame: the function being run, the instruction pointer into its
286/// chunk, the base slot where this frame's locals begin, and the frame's local
287/// slots.
288///
289/// the frame owns its `locals` vec, which drops when the frame is popped on
290/// `RETURN`. 05-RESEARCH.md describes a per-frame bump arena freed wholesale on
291/// return; in v1 the heap's free list already reclaims slots, so the frame
292/// owning its `locals` (dropped on return) is the whole of the "arena" -- a
293/// distinct bump region is not required for correctness, and adding one would
294/// be dead weight against the free list. a richer arena stays a v2 idea.
295///
296/// derives `Clone` but not `Debug` -- `locals` is a `Vec<Value>` and `Value`'s
297/// locked derive list omits `Debug`.
298#[derive(Clone)]
299pub struct CallFrame {
300 /// index into [`Program::chunks`] -- which function this frame runs.
301 pub chunk_idx: usize,
302 /// the instruction pointer: a byte offset into the chunk's `code`.
303 pub ip: usize,
304 /// the value-stack index where this frame's `locals[0]` conceptually sits.
305 /// the call machinery (a later commit) uses it to unwind the stack on
306 /// `RETURN`.
307 pub base: usize,
308 /// this frame's local slots, indexed by `GET_LOCAL` / `SET_LOCAL`
309 /// operands. dropped when the frame is popped.
310 pub locals: Vec<Value>,
311}
312
313/// one typed value in a [`VmState`] snapshot: its display string and its
314/// runtime type name.
315///
316/// the playground type-tints a stack slot or a variable by `type_name` (so an
317/// `i64` gets one colour, a `str` another) and shows `rendered` as the value.
318/// `rendered` comes from [`Vm::value_to_string`]; `type_name` from
319/// [`Vm::runtime_type_name`].
320///
321/// derives `serde::Serialize` so Phase 6's WASM bridge can hand the snapshot
322/// straight to JavaScript -- the same precedent `diagnostics.rs`'s
323/// `MonacoDiagnostic` set.
324#[derive(Debug, Clone, serde::Serialize)]
325pub struct StateValue {
326 /// the value's display string, e.g. `42`, `true`, `[1, 2, 3]`.
327 pub rendered: String,
328 /// the value's runtime type name, e.g. `i64`, `str`, `[i64]`, `Shape`.
329 pub type_name: String,
330}
331
332/// one in-scope variable in a [`VmState`] snapshot: a name plus its typed
333/// value.
334///
335/// `name` is the variable's real source name -- `x`, `sum`, a `for` loop
336/// variable -- recovered from the chunk's [`Chunk::local_names`] table; a
337/// compiler-synthesized temporary with no recorded name falls back to
338/// `slot{i}`. `value` carries the same rendered-string + type-name pair a
339/// stack slot does. derives `serde::Serialize` for the WASM bridge.
340#[derive(Debug, Clone, serde::Serialize)]
341pub struct NamedValue {
342 /// the variable's source name (or `slot{i}` for an unnamed temporary).
343 pub name: String,
344 /// the variable's current value, rendered and type-tagged.
345 pub value: StateValue,
346}
347
348/// the playground's step-through snapshot of the VM.
349///
350/// [`Vm::get_state`] builds one of these. it carries everything the
351/// playground's panels render after each instruction: the current chunk index
352/// and instruction pointer (the bytecode panel's highlight), the value stack
353/// (the animated stack panel), the current frame's in-scope variables (the
354/// variables panel), the accumulated console output (the console panel), and
355/// the resource-leak log.
356///
357/// a plain data struct -- no behaviour. derives `serde::Serialize` so Phase
358/// 6's WASM bridge serializes it for JavaScript without a conversion layer.
359#[derive(Debug, Clone, serde::Serialize)]
360pub struct VmState {
361 /// the index into [`Program::chunks`] of the function currently running.
362 pub chunk_index: usize,
363 /// the instruction pointer: a byte offset into that chunk's `code`.
364 pub ip: usize,
365 /// the 1-based source line of the instruction at `ip`, for the editor's
366 /// current-line highlight. `0` means no line -- a synthesized instruction
367 /// or an out-of-range `ip`; the playground highlights nothing then.
368 pub current_line: usize,
369 /// the value stack bottom-to-top, each slot rendered and type-tagged.
370 pub stack: Vec<StateValue>,
371 /// the current frame's in-scope local variables, name + typed value.
372 pub variables: Vec<NamedValue>,
373 /// the accumulated `print` / `println` output, one entry per write.
374 pub console: Vec<String>,
375 /// the resource-leak log: a file handle freed while still open.
376 pub leak_log: Vec<String>,
377}
378
379/// the bytecode virtual machine.
380///
381/// holds the program being run, the value stack, the call-frame stack, the
382/// heap, the console buffer, the leak log, the persistent globals, and the
383/// original source text (needed to build a line-covering [`Span`] for a
384/// runtime error and for the REPL pipeline).
385pub struct Vm {
386 /// the program being executed.
387 program: Program,
388 /// the value stack -- every slot a NaN-boxed [`Value`]. capped at
389 /// [`MAX_STACK`].
390 stack: Vec<Value>,
391 /// the call-frame stack. capped at [`MAX_FRAMES`]; the topmost frame is
392 /// the one currently executing.
393 frames: Vec<CallFrame>,
394 /// the heap. `pub(crate)` so `crate::stdlib` allocates result objects
395 /// (`push`, `pop`, `open`, `map`, ...) and reads argument objects through
396 /// it -- the native functions need the same heap the opcode handlers use.
397 pub(crate) heap: Heap,
398 /// captured `print` / `println` output. the VM runs in WASM where stdout
399 /// is invisible; the playground renders this buffer instead.
400 ///
401 /// read by [`Vm::get_state`] for the snapshot; written by `crate::stdlib`'s
402 /// `print` / `println`. `pub(crate)` so those native functions append here.
403 pub(crate) console: Vec<String>,
404 /// resource-leak log: a [`HeapObject::FileHandle`] freed while still open
405 /// is recorded here, surfaced for the playground.
406 ///
407 /// read by [`Vm::get_state`] for the snapshot; written by the heap-decrement
408 /// leak check the `call_stdlib` wiring adds. `pub(crate)` so that check (and
409 /// any future leak source) can append a leak message.
410 pub(crate) leak_log: Vec<String>,
411 /// persistent global slots, indexed by `GET_GLOBAL` / `SET_GLOBAL`. kept
412 /// across REPL calls.
413 globals: Vec<Value>,
414 /// the original source text. used to map a `source_lines` line back to a
415 /// byte-range [`Span`] for runtime errors, and by the REPL pipeline.
416 src: String,
417 /// the accumulated REPL source lines, in acceptance order. each
418 /// [`Vm::repl_eval`] call concatenates every prior accepted line plus the
419 /// new one into one wrapped source string and runs the whole pipeline on
420 /// it -- the accumulating-source REPL. a line that fails to compile is NOT
421 /// appended, so a typo never poisons later evaluations. empty for a VM
422 /// that is not used as a REPL.
423 repl_history: Vec<String>,
424}
425
426impl Vm {
427 /// construct a VM ready to run `program` from its entry point.
428 ///
429 /// pushes the initial [`CallFrame`] for `program.main_index`. `src` is the
430 /// program's source text -- the VM keeps it to build a line-covering span
431 /// for any runtime error and to drive the REPL pipeline.
432 pub fn new(program: Program, src: String) -> Vm {
433 let main_index = program.main_index;
434 let frames = vec![CallFrame {
435 chunk_idx: main_index,
436 ip: 0,
437 base: 0,
438 locals: Vec::new(),
439 }];
440 Vm {
441 program,
442 stack: Vec::new(),
443 frames,
444 heap: Heap::new(),
445 console: Vec::new(),
446 leak_log: Vec::new(),
447 globals: Vec::new(),
448 src,
449 repl_history: Vec::new(),
450 }
451 }
452
453 /// construct an empty VM ready to receive REPL calls.
454 ///
455 /// unlike [`Vm::new`], there is no `Program` and no `main` frame yet -- a
456 /// REPL VM starts blank: an empty program, an empty heap, an empty
457 /// console, an empty `repl_history`. the first [`Vm::repl_eval`] call
458 /// builds the first real `Program` from the first line and runs it; every
459 /// later call rebuilds the program from the whole accumulated source.
460 ///
461 /// the empty `Program` has no chunks, so this VM must not be `run()`
462 /// directly -- `repl_eval` is the only entry point for a REPL VM. the
463 /// persistent `console` / `leak_log` accumulate across `repl_eval` calls.
464 pub fn new_repl() -> Vm {
465 Vm {
466 program: Program::new(),
467 stack: Vec::new(),
468 frames: Vec::new(),
469 heap: Heap::new(),
470 console: Vec::new(),
471 leak_log: Vec::new(),
472 globals: Vec::new(),
473 src: String::new(),
474 repl_history: Vec::new(),
475 }
476 }
477
478 /// the topmost call frame.
479 ///
480 /// returns `Err` when the frame stack is empty -- a VM-internal invariant
481 /// violation (a well-formed run always has at least the `main` frame until
482 /// it returns), surfaced as a `Runtime` error rather than a panic.
483 fn frame(&self) -> Result<&CallFrame, QalaError> {
484 self.frames.last().ok_or_else(|| QalaError::Runtime {
485 span: Span::new(0, 0),
486 message: "no active call frame".to_string(),
487 })
488 }
489
490 /// the topmost call frame, mutably. see [`Vm::frame`] for the empty-stack
491 /// case.
492 fn frame_mut(&mut self) -> Result<&mut CallFrame, QalaError> {
493 self.frames.last_mut().ok_or_else(|| QalaError::Runtime {
494 span: Span::new(0, 0),
495 message: "no active call frame".to_string(),
496 })
497 }
498
499 /// the chunk the topmost frame is running.
500 ///
501 /// returns `Err` when the frame stack is empty or the frame's `chunk_idx`
502 /// is out of range -- both VM-internal invariant violations surfaced as a
503 /// `Runtime` error, never an out-of-bounds index.
504 fn chunk(&self) -> Result<&Chunk, QalaError> {
505 let idx = self.frame()?.chunk_idx;
506 self.program
507 .chunks
508 .get(idx)
509 .ok_or_else(|| QalaError::Runtime {
510 span: Span::new(0, 0),
511 message: format!("call frame references missing chunk {idx}"),
512 })
513 }
514
515 /// push a value onto the value stack.
516 ///
517 /// returns `Err` with "value stack overflow" when the stack is already at
518 /// [`MAX_STACK`] -- a runaway program cannot exhaust host memory through
519 /// the value stack.
520 fn push(&mut self, v: Value) -> Result<(), QalaError> {
521 if self.stack.len() >= MAX_STACK {
522 return Err(self.runtime_err("value stack overflow"));
523 }
524 self.stack.push(v);
525 Ok(())
526 }
527
528 /// pop the top value off the value stack.
529 ///
530 /// returns `Err` with "stack underflow" when the stack is empty -- a
531 /// malformed instruction stream that pops more than it pushes is a
532 /// `Runtime` error, never a panic.
533 fn pop(&mut self) -> Result<Value, QalaError> {
534 self.stack
535 .pop()
536 .ok_or_else(|| self.runtime_err("stack underflow"))
537 }
538
539 /// build a [`QalaError::Runtime`] whose span covers the source line of the
540 /// instruction currently at the topmost frame's `ip`.
541 ///
542 /// the line comes from `chunk.source_lines[ip]`; the byte range of that
543 /// line is found by scanning [`LineIndex`] over `self.src`. when the line
544 /// cannot be resolved (an empty frame stack, a missing chunk, an `ip` past
545 /// the source map, a line number past the source) the span is a harmless
546 /// zero-width span -- this never panics.
547 ///
548 /// `pub(crate)` so the native stdlib (`crate::stdlib`) raises a wrong-type
549 /// or wrong-arity argument as the same line-bearing `Runtime` error every
550 /// opcode handler uses.
551 pub(crate) fn runtime_err(&self, message: &str) -> QalaError {
552 let span = self.error_span();
553 QalaError::Runtime {
554 span,
555 message: message.to_string(),
556 }
557 }
558
559 /// the source span covering the line of the current instruction.
560 ///
561 /// a free function off the error path so [`Vm::runtime_err`] stays a thin
562 /// constructor. on any lookup miss it returns a zero-width span at offset
563 /// 0 -- a runtime error must never fail to be built.
564 fn error_span(&self) -> Span {
565 // the 1-based source line of the current instruction, or 0 on a miss.
566 let line = self
567 .frame()
568 .ok()
569 .and_then(|f| {
570 let ip = f.ip;
571 self.chunk()
572 .ok()
573 .and_then(|c| c.source_lines.get(ip).copied())
574 })
575 .unwrap_or(0);
576 if line == 0 {
577 return Span::new(0, 0);
578 }
579 // find the byte range of that 1-based line in the source.
580 let index = LineIndex::new(&self.src);
581 line_span(&index, &self.src, line)
582 }
583
584 // ---- dispatch ----------------------------------------------------------
585
586 /// decode and execute exactly one instruction at the current frame's `ip`.
587 ///
588 /// the shared core of [`Vm::run`] and [`Vm::step`]; there is no dispatch
589 /// logic anywhere else. the sequence is: bounds-check `ip < code.len()`,
590 /// decode the opcode byte, bounds-check that the whole operand sequence is
591 /// in range, advance `ip` PAST the whole instruction, then run the handler.
592 ///
593 /// ip-advance discipline: `ip` is advanced to the fall-through position
594 /// (`opcode_pos + 1 + operand_bytes`) BEFORE the handler body runs. a JUMP
595 /// handler then overwrites `ip` with its computed target. every other
596 /// handler leaves the advanced `ip` in place. this is why a JUMP target is
597 /// `fall_through + offset` -- `fall_through` is already `opcode_pos + 1 + 2`,
598 /// exactly the "byte after the operand" the offset is relative to.
599 ///
600 /// every byte read here is bounds-checked; a bad opcode byte or a truncated
601 /// operand is a [`QalaError::Runtime`], never a panic.
602 fn dispatch_one(&mut self) -> Result<StepOutcome, QalaError> {
603 let ip = self.frame()?.ip;
604 let code_len = self.chunk()?.code.len();
605 if ip >= code_len {
606 return Err(self.runtime_err("instruction pointer past end of chunk"));
607 }
608 // decode the opcode byte -- guaranteed in range by the check above.
609 let byte = self.chunk()?.code[ip];
610 let op = Opcode::from_u8(byte)
611 .ok_or_else(|| self.runtime_err(&format!("bad opcode byte {byte:#x}")))?;
612 // bounds-check the operand sequence before any read of it.
613 let operand_len = op.operand_bytes() as usize;
614 if ip + 1 + operand_len > code_len {
615 return Err(self.runtime_err("truncated operand"));
616 }
617 // the fall-through ip: one past the whole instruction.
618 let next = ip + 1 + operand_len;
619 // advance FIRST; a JUMP handler below overwrites this.
620 self.frame_mut()?.ip = next;
621 self.run_opcode(op, ip, next)
622 }
623
624 /// run the handler for `op`, whose opcode byte is at `opcode_pos` and whose
625 /// fall-through ip is `next`. split out of [`Vm::dispatch_one`] so the
626 /// decode-and-bounds-check is one place and the per-opcode logic another.
627 ///
628 /// every opcode this plan does not implement returns a clean `Runtime`
629 /// "opcode not yet implemented" error so the match stays exhaustive and the
630 /// crate compiles; a later commit replaces those arms.
631 fn run_opcode(
632 &mut self,
633 op: Opcode,
634 opcode_pos: usize,
635 next: usize,
636 ) -> Result<StepOutcome, QalaError> {
637 match op {
638 // ---- stack ----
639 Opcode::Const => {
640 let idx = self.read_operand_u16(opcode_pos)?;
641 self.op_const(idx)?;
642 }
643 Opcode::Pop => {
644 self.pop()?;
645 }
646 Opcode::Dup => {
647 // peek the top value and push a copy. a Value is Copy, so the
648 // duplicate is a plain bit copy; refcount bookkeeping for a
649 // duplicated heap pointer is a later commit's concern.
650 let top = *self
651 .stack
652 .last()
653 .ok_or_else(|| self.runtime_err("stack underflow"))?;
654 self.push(top)?;
655 }
656 // ---- locals + globals ----
657 Opcode::GetLocal => {
658 let slot = self.read_operand_u16(opcode_pos)? as usize;
659 let v = *self
660 .frame()?
661 .locals
662 .get(slot)
663 .ok_or_else(|| self.runtime_err(&format!("bad local slot {slot}")))?;
664 self.push(v)?;
665 }
666 Opcode::SetLocal => {
667 let slot = self.read_operand_u16(opcode_pos)? as usize;
668 let v = self.pop()?;
669 let locals = &mut self.frame_mut()?.locals;
670 // a SET_LOCAL past the current end grows the vec with void
671 // padding -- codegen numbers slots densely from 0, so the only
672 // way a slot lands past the end is the first write to it.
673 if slot >= locals.len() {
674 locals.resize(slot + 1, Value::void());
675 }
676 locals[slot] = v;
677 }
678 Opcode::GetGlobal => {
679 let idx = self.read_operand_u16(opcode_pos)? as usize;
680 let v = *self
681 .globals
682 .get(idx)
683 .ok_or_else(|| self.runtime_err(&format!("bad global slot {idx}")))?;
684 self.push(v)?;
685 }
686 Opcode::SetGlobal => {
687 let idx = self.read_operand_u16(opcode_pos)? as usize;
688 let v = self.pop()?;
689 if idx >= self.globals.len() {
690 self.globals.resize(idx + 1, Value::void());
691 }
692 self.globals[idx] = v;
693 }
694 // ---- i64 arithmetic ----
695 // every result is checked: an overflow (or i64::MIN / -1, or a
696 // zero divisor) is a Runtime error, never a Rust panic. the result
697 // is a fresh heap Int -- i64 is uniformly heap-boxed.
698 Opcode::Add => self.op_arith_i64(IntOp::Add)?,
699 Opcode::Sub => self.op_arith_i64(IntOp::Sub)?,
700 Opcode::Mul => self.op_arith_i64(IntOp::Mul)?,
701 Opcode::Div => self.op_arith_i64(IntOp::Div)?,
702 Opcode::Mod => self.op_arith_i64(IntOp::Mod)?,
703 Opcode::Neg => {
704 let n = self.pop_i64()?;
705 let r = n
706 .checked_neg()
707 .ok_or_else(|| self.runtime_err("integer overflow"))?;
708 self.push_i64(r)?;
709 }
710 // ---- f64 arithmetic (IEEE 754, no error -- inf / NaN are valid) ----
711 Opcode::FAdd => self.op_arith_f64(FloatOp::Add)?,
712 Opcode::FSub => self.op_arith_f64(FloatOp::Sub)?,
713 Opcode::FMul => self.op_arith_f64(FloatOp::Mul)?,
714 Opcode::FDiv => self.op_arith_f64(FloatOp::Div)?,
715 Opcode::FNeg => {
716 let x = self.pop_f64()?;
717 self.push(Value::from_f64(-x))?;
718 }
719 // ---- comparisons (the VM dispatches by operand type) ----
720 Opcode::Eq => self.op_compare(CmpOp::Eq)?,
721 Opcode::Ne => self.op_compare(CmpOp::Ne)?,
722 Opcode::Lt => self.op_compare(CmpOp::Lt)?,
723 Opcode::Le => self.op_compare(CmpOp::Le)?,
724 Opcode::Gt => self.op_compare(CmpOp::Gt)?,
725 Opcode::Ge => self.op_compare(CmpOp::Ge)?,
726 // ---- f64 comparisons (IEEE 754: NaN compares unequal) ----
727 Opcode::FEq => self.op_compare_f64(CmpOp::Eq)?,
728 Opcode::FNe => self.op_compare_f64(CmpOp::Ne)?,
729 Opcode::FLt => self.op_compare_f64(CmpOp::Lt)?,
730 Opcode::FLe => self.op_compare_f64(CmpOp::Le)?,
731 Opcode::FGt => self.op_compare_f64(CmpOp::Gt)?,
732 Opcode::FGe => self.op_compare_f64(CmpOp::Ge)?,
733 // ---- logic ----
734 Opcode::Not => {
735 let b = self.pop_bool()?;
736 self.push(Value::bool(!b))?;
737 }
738 // ---- control flow ----
739 Opcode::Jump => {
740 let offset = self.read_operand_i16(opcode_pos)?;
741 self.do_jump(next, offset)?;
742 }
743 Opcode::JumpIfFalse => {
744 let offset = self.read_operand_i16(opcode_pos)?;
745 if !self.pop_bool()? {
746 self.do_jump(next, offset)?;
747 }
748 }
749 Opcode::JumpIfTrue => {
750 let offset = self.read_operand_i16(opcode_pos)?;
751 if self.pop_bool()? {
752 self.do_jump(next, offset)?;
753 }
754 }
755 // ---- calls ----
756 Opcode::Call => {
757 let fn_id = self.read_operand_u16(opcode_pos)?;
758 let argc = self.read_operand_u8(opcode_pos)?;
759 self.op_call(fn_id, argc)?;
760 }
761 Opcode::Return => {
762 // a RETURN from the last frame ends the program.
763 if self.op_return()? {
764 return Ok(StepOutcome::Halted);
765 }
766 }
767 // ---- heap construction ----
768 Opcode::MakeArray => {
769 let count = self.read_operand_u16(opcode_pos)?;
770 self.op_make_collection(count, false)?;
771 }
772 Opcode::MakeTuple => {
773 let count = self.read_operand_u16(opcode_pos)?;
774 self.op_make_collection(count, true)?;
775 }
776 Opcode::MakeStruct => {
777 let struct_id = self.read_operand_u16(opcode_pos)?;
778 self.op_make_struct(struct_id)?;
779 }
780 Opcode::MakeEnumVariant => {
781 let variant_id = self.read_operand_u16(opcode_pos)?;
782 let payload_count = self.read_operand_u8(opcode_pos)?;
783 self.op_make_enum_variant(variant_id, payload_count)?;
784 }
785 // ---- access ----
786 Opcode::Index => self.op_index()?,
787 Opcode::Field => {
788 let field_index = self.read_operand_u16(opcode_pos)?;
789 self.op_field(field_index)?;
790 }
791 Opcode::Len => self.op_len()?,
792 // ---- strings ----
793 Opcode::ToStr => self.op_to_str()?,
794 Opcode::ConcatN => {
795 let count = self.read_operand_u16(opcode_pos)?;
796 self.op_concat_n(count)?;
797 }
798 // ---- match dispatch ----
799 Opcode::MatchVariant => {
800 let variant_id = self.read_operand_u16(opcode_pos)?;
801 let offset = self.read_match_variant_offset(opcode_pos)?;
802 self.op_match_variant(variant_id, offset, next)?;
803 }
804 // ---- sentinel ----
805 Opcode::Halt => return Ok(StepOutcome::Halted),
806 }
807 let _ = next;
808 Ok(StepOutcome::Ran)
809 }
810
811 /// read the `u16` operand of the instruction whose opcode byte is at
812 /// `opcode_pos`. the operand sits at `opcode_pos + 1`.
813 ///
814 /// [`Vm::dispatch_one`] has already bounds-checked the whole operand
815 /// sequence, so the two-byte read cannot go out of range -- but this
816 /// re-checks defensively rather than trust the caller, keeping the
817 /// never-panic guarantee local to this function.
818 fn read_operand_u16(&self, opcode_pos: usize) -> Result<u16, QalaError> {
819 let chunk = self.chunk()?;
820 let off = opcode_pos + 1;
821 if off + 2 > chunk.code.len() {
822 return Err(self.runtime_err("truncated operand"));
823 }
824 Ok(chunk.read_u16(off))
825 }
826
827 /// read the trailing `u8` operand of a three-byte instruction (the `argc`
828 /// of [`Opcode::Call`], the `payload_count` of [`Opcode::MakeEnumVariant`])
829 /// whose opcode byte is at `opcode_pos`.
830 ///
831 /// the byte sits at `opcode_pos + 3` -- after the `u16` that precedes it.
832 /// [`Vm::dispatch_one`] has already bounds-checked the whole three-byte
833 /// operand sequence, but this re-checks defensively so the never-panic
834 /// guarantee stays local to this function.
835 fn read_operand_u8(&self, opcode_pos: usize) -> Result<u8, QalaError> {
836 let chunk = self.chunk()?;
837 let off = opcode_pos + 3;
838 chunk
839 .code
840 .get(off)
841 .copied()
842 .ok_or_else(|| self.runtime_err("truncated operand"))
843 }
844
845 /// read the trailing `i16` miss-offset of a [`Opcode::MatchVariant`]
846 /// instruction whose opcode byte is at `opcode_pos`.
847 ///
848 /// MATCH_VARIANT's operand layout is `u16 variant_id` then `i16 offset`, so
849 /// the signed offset sits at `opcode_pos + 3`. [`Vm::dispatch_one`] has
850 /// already bounds-checked the four-byte operand sequence; this re-checks
851 /// the two-byte read defensively so the never-panic guarantee stays local.
852 fn read_match_variant_offset(&self, opcode_pos: usize) -> Result<i16, QalaError> {
853 let chunk = self.chunk()?;
854 let off = opcode_pos + 3;
855 if off + 2 > chunk.code.len() {
856 return Err(self.runtime_err("truncated operand"));
857 }
858 Ok(chunk.read_i16(off))
859 }
860
861 /// the constant-pool entry at `idx` converted to a runtime [`Value`] and
862 /// pushed onto the stack.
863 ///
864 /// an `i64` constant allocates a [`HeapObject::Int`] and pushes a pointer
865 /// (the uniform-heap-box rule); a `str` constant allocates a
866 /// [`HeapObject::Str`]; an `f64` / `bool` / `byte` / `void` becomes a
867 /// tagged scalar directly; a function constant becomes a [`Value::function`]
868 /// tagged scalar carrying the fn-id (no heap object). a bad pool index or a
869 /// heap-exhaustion is a [`QalaError::Runtime`].
870 fn op_const(&mut self, idx: u16) -> Result<(), QalaError> {
871 let constant = self
872 .chunk()?
873 .constants
874 .get(idx as usize)
875 .cloned()
876 .ok_or_else(|| self.runtime_err(&format!("bad constant index {idx}")))?;
877 let value = match constant {
878 ConstValue::I64(n) => {
879 let slot = self
880 .heap
881 .alloc(HeapObject::Int(n))
882 .ok_or_else(|| self.runtime_err("heap exhausted"))?;
883 Value::pointer(slot)
884 }
885 ConstValue::F64(x) => Value::from_f64(x),
886 ConstValue::Bool(b) => Value::bool(b),
887 ConstValue::Byte(b) => Value::byte(b),
888 ConstValue::Void => Value::void(),
889 ConstValue::Str(s) => {
890 let slot = self
891 .heap
892 .alloc(HeapObject::Str(s))
893 .ok_or_else(|| self.runtime_err("heap exhausted"))?;
894 Value::pointer(slot)
895 }
896 // a function value is a tagged scalar, not a heap object: the u16
897 // fn-id rides in the NaN payload. the higher-order stdlib functions
898 // recover it via Value::as_function.
899 ConstValue::Function(id) => Value::function(id),
900 };
901 self.push(value)
902 }
903
904 // ---- typed pops + the i64 push ----------------------------------------
905
906 /// pop a value and decode it as an `i64`.
907 ///
908 /// an `i64` runtime value is a `TAG_PTR` pointer to a [`HeapObject::Int`]
909 /// (the uniform-heap-box rule). a value that is not a pointer, or a pointer
910 /// to a non-`Int` heap object, is a [`QalaError::Runtime`] "expected an
911 /// integer" -- never a panic.
912 fn pop_i64(&mut self) -> Result<i64, QalaError> {
913 let v = self.pop()?;
914 let slot = v
915 .as_pointer()
916 .ok_or_else(|| self.runtime_err("expected an integer"))?;
917 match self.heap.get(slot) {
918 Some(HeapObject::Int(n)) => Ok(*n),
919 _ => Err(self.runtime_err("expected an integer")),
920 }
921 }
922
923 /// pop a value and decode it as an `f64`.
924 ///
925 /// an `f64` is stored verbatim in the value (not heap-boxed). a non-`f64`
926 /// value is a [`QalaError::Runtime`] "expected a float".
927 fn pop_f64(&mut self) -> Result<f64, QalaError> {
928 let v = self.pop()?;
929 v.as_f64()
930 .ok_or_else(|| self.runtime_err("expected a float"))
931 }
932
933 /// pop a value and decode it as a `bool`.
934 ///
935 /// a non-`bool` value is a [`QalaError::Runtime`] "expected a boolean".
936 fn pop_bool(&mut self) -> Result<bool, QalaError> {
937 let v = self.pop()?;
938 v.as_bool()
939 .ok_or_else(|| self.runtime_err("expected a boolean"))
940 }
941
942 /// pop a value and decode it as an owned `String`.
943 ///
944 /// a `str` runtime value is a `TAG_PTR` pointer to a [`HeapObject::Str`].
945 /// the string is cloned out so the caller owns it independently of the
946 /// heap slot. a non-string value is a [`QalaError::Runtime`] "expected a
947 /// string".
948 ///
949 /// `allow(dead_code)`: the string-comparison path in `op_compare` uses the
950 /// heap slot directly; this typed pop is part of the pop-helper family the
951 /// later string opcodes (`CONCAT_N`, `TO_STR`) consume.
952 #[allow(dead_code)]
953 fn pop_str(&mut self) -> Result<String, QalaError> {
954 let v = self.pop()?;
955 let slot = v
956 .as_pointer()
957 .ok_or_else(|| self.runtime_err("expected a string"))?;
958 match self.heap.get(slot) {
959 Some(HeapObject::Str(s)) => Ok(s.clone()),
960 _ => Err(self.runtime_err("expected a string")),
961 }
962 }
963
964 /// allocate a [`HeapObject::Int`] for `n` and push a pointer to it.
965 ///
966 /// the shared tail of every i64-producing opcode: `i64` is uniformly
967 /// heap-boxed, so an integer result is always a fresh heap object reached
968 /// through a pointer. a heap-exhaustion is a [`QalaError::Runtime`].
969 fn push_i64(&mut self, n: i64) -> Result<(), QalaError> {
970 let slot = self
971 .heap
972 .alloc(HeapObject::Int(n))
973 .ok_or_else(|| self.runtime_err("heap exhausted"))?;
974 self.push(Value::pointer(slot))
975 }
976
977 /// read the `i16` operand (a signed jump offset) of the instruction whose
978 /// opcode byte is at `opcode_pos`.
979 ///
980 /// like [`Vm::read_operand_u16`] this re-checks the two-byte read is in
981 /// range rather than trust the caller, keeping the never-panic guarantee
982 /// local.
983 fn read_operand_i16(&self, opcode_pos: usize) -> Result<i16, QalaError> {
984 let chunk = self.chunk()?;
985 let off = opcode_pos + 1;
986 if off + 2 > chunk.code.len() {
987 return Err(self.runtime_err("truncated operand"));
988 }
989 Ok(chunk.read_i16(off))
990 }
991
992 // ---- arithmetic / comparison / jump handlers --------------------------
993
994 /// pop two `i64` operands and push the checked result of `op`.
995 ///
996 /// the deeper stack value is the left operand. every operation goes through
997 /// `i64::checked_*`: an overflow is "integer overflow", a zero divisor (or
998 /// the `i64::MIN / -1` overflow that `checked_div` / `checked_rem` also
999 /// reject) is "division by zero" / "modulo by zero". one `None` check on
1000 /// the checked op covers both faults.
1001 fn op_arith_i64(&mut self, op: IntOp) -> Result<(), QalaError> {
1002 // pop order: rhs is on top, lhs is below it.
1003 let rhs = self.pop_i64()?;
1004 let lhs = self.pop_i64()?;
1005 let result = match op {
1006 IntOp::Add => lhs
1007 .checked_add(rhs)
1008 .ok_or_else(|| self.runtime_err("integer overflow"))?,
1009 IntOp::Sub => lhs
1010 .checked_sub(rhs)
1011 .ok_or_else(|| self.runtime_err("integer overflow"))?,
1012 IntOp::Mul => lhs
1013 .checked_mul(rhs)
1014 .ok_or_else(|| self.runtime_err("integer overflow"))?,
1015 IntOp::Div => lhs
1016 .checked_div(rhs)
1017 .ok_or_else(|| self.runtime_err("division by zero"))?,
1018 IntOp::Mod => lhs
1019 .checked_rem(rhs)
1020 .ok_or_else(|| self.runtime_err("modulo by zero"))?,
1021 };
1022 self.push_i64(result)
1023 }
1024
1025 /// pop two `f64` operands and push the IEEE 754 result of `op`.
1026 ///
1027 /// the deeper stack value is the left operand. float arithmetic never
1028 /// errors -- `FDiv` of a zero divisor yields `inf` / `-inf` / `NaN` per
1029 /// IEEE 754, exactly as plain Rust `f64` `/` does.
1030 fn op_arith_f64(&mut self, op: FloatOp) -> Result<(), QalaError> {
1031 let rhs = self.pop_f64()?;
1032 let lhs = self.pop_f64()?;
1033 let result = match op {
1034 FloatOp::Add => lhs + rhs,
1035 FloatOp::Sub => lhs - rhs,
1036 FloatOp::Mul => lhs * rhs,
1037 FloatOp::Div => lhs / rhs,
1038 };
1039 self.push(Value::from_f64(result))
1040 }
1041
1042 /// pop two values, compare them by their runtime kind, and push the `bool`
1043 /// result of `op`.
1044 ///
1045 /// the two operands must be the same kind. supported kinds: two heap
1046 /// `Int`s (compared as `i64`), two heap `Str`s (compared lexicographically
1047 /// as Rust strings), or two `bool`s. a `bool` ordering follows Rust's
1048 /// `false < true`; the typechecker only emits an ordering comparison on a
1049 /// type it has already accepted, so the runtime ordering is well-defined
1050 /// for every case the typechecker lets through. a kind mismatch between the
1051 /// two operands is a [`QalaError::Runtime`].
1052 fn op_compare(&mut self, op: CmpOp) -> Result<(), QalaError> {
1053 let rhs = self.pop()?;
1054 let lhs = self.pop()?;
1055 let ordering = self.compare_values(lhs, rhs)?;
1056 self.push(Value::bool(op.holds(ordering)))
1057 }
1058
1059 /// the [`Ordering`](std::cmp::Ordering) of two same-kind values.
1060 ///
1061 /// pulled out of [`Vm::op_compare`] so the kind dispatch is one place. a
1062 /// pointer is resolved to its heap object before comparison; a kind
1063 /// mismatch is a [`QalaError::Runtime`].
1064 fn compare_values(&self, lhs: Value, rhs: Value) -> Result<std::cmp::Ordering, QalaError> {
1065 // both bools: order false < true.
1066 if let (Some(a), Some(b)) = (lhs.as_bool(), rhs.as_bool()) {
1067 return Ok(a.cmp(&b));
1068 }
1069 // otherwise both must be pointers to comparable heap objects.
1070 match (lhs.as_pointer(), rhs.as_pointer()) {
1071 (Some(a), Some(b)) => match (self.heap.get(a), self.heap.get(b)) {
1072 (Some(HeapObject::Int(x)), Some(HeapObject::Int(y))) => Ok(x.cmp(y)),
1073 (Some(HeapObject::Str(x)), Some(HeapObject::Str(y))) => Ok(x.cmp(y)),
1074 _ => Err(self.runtime_err("cannot compare values of different types")),
1075 },
1076 _ => Err(self.runtime_err("cannot compare values of different types")),
1077 }
1078 }
1079
1080 /// pop two `f64` operands, compare them per IEEE 754, push the `bool`.
1081 ///
1082 /// IEEE 754 ordering: a `NaN` compares unequal to everything including
1083 /// itself, and is unordered, so `FEq` of two `NaN`s is `false`, `FNe` is
1084 /// `true`, and `FLt` / `FLe` / `FGt` / `FGe` involving a `NaN` are all
1085 /// `false`. this falls out of Rust's `f64` `PartialOrd` / `==` directly.
1086 fn op_compare_f64(&mut self, op: CmpOp) -> Result<(), QalaError> {
1087 let rhs = self.pop_f64()?;
1088 let lhs = self.pop_f64()?;
1089 let result = match op {
1090 CmpOp::Eq => lhs == rhs,
1091 CmpOp::Ne => lhs != rhs,
1092 CmpOp::Lt => lhs < rhs,
1093 CmpOp::Le => lhs <= rhs,
1094 CmpOp::Gt => lhs > rhs,
1095 CmpOp::Ge => lhs >= rhs,
1096 };
1097 self.push(Value::bool(result))
1098 }
1099
1100 /// take a jump: set the current frame's `ip` to `fall_through + offset`.
1101 ///
1102 /// `fall_through` is the byte AFTER the whole jump instruction -- which is
1103 /// exactly the position the codegen's signed `i16` offset is relative to.
1104 /// the computed target must land in `0..=code.len()`; a target outside the
1105 /// chunk is a [`QalaError::Runtime`] "jump target out of range", never a
1106 /// silent out-of-bounds `ip`.
1107 fn do_jump(&mut self, fall_through: usize, offset: i16) -> Result<(), QalaError> {
1108 let code_len = self.chunk()?.code.len();
1109 let target = fall_through as isize + offset as isize;
1110 if target < 0 || target as usize > code_len {
1111 return Err(self.runtime_err("jump target out of range"));
1112 }
1113 self.frame_mut()?.ip = target as usize;
1114 Ok(())
1115 }
1116
1117 // ---- the call machinery -----------------------------------------------
1118
1119 /// execute a [`Opcode::Call`]: dispatch a user function or seam to the
1120 /// stdlib.
1121 ///
1122 /// `fn_id` is the `u16` from the opcode; a `fn_id >= STDLIB_FN_BASE`
1123 /// (40000) is a native stdlib call, handed to [`Vm::call_stdlib`] -- a seam
1124 /// plan 05-05 fills in. otherwise `fn_id` indexes [`Program::chunks`]: the
1125 /// frame-depth cap is checked, then a fresh [`CallFrame`] is pushed for the
1126 /// callee. the `argc` argument values are already on the value stack (the
1127 /// topmost is the rightmost argument); they are moved off the stack into
1128 /// the new frame's `locals` so `locals[0]` is the leftmost argument --
1129 /// matching how `GET_LOCAL` reads `frame.locals` and how codegen numbers
1130 /// parameter slots `0..argc`.
1131 ///
1132 /// the new frame's `base` is the value-stack length AFTER the arguments are
1133 /// removed -- the watermark `RETURN` truncates the stack back to. the
1134 /// caller frame's `ip` already sits at the instruction after the CALL
1135 /// (the ip-advance-first discipline), so `RETURN` resumes it correctly.
1136 fn op_call(&mut self, fn_id: u16, argc: u8) -> Result<(), QalaError> {
1137 // a stdlib fn-id: hand off to the native-dispatch seam.
1138 if fn_id >= STDLIB_FN_BASE {
1139 return self.call_stdlib(fn_id, argc);
1140 }
1141 // a user function: enforce the frame-depth cap before pushing. the VM
1142 // is a `while` loop, so a Qala recursion grows `frames`, not the host
1143 // Rust stack -- this cap turns unbounded recursion into a clean
1144 // `Runtime` error rather than a host stack overflow.
1145 if self.frames.len() >= MAX_FRAMES {
1146 return Err(self.runtime_err("stack overflow"));
1147 }
1148 // resolve the callee chunk, fallibly.
1149 let chunk_idx = fn_id as usize;
1150 if self.program.chunks.get(chunk_idx).is_none() {
1151 return Err(self.runtime_err(&format!("call to missing function {fn_id}")));
1152 }
1153 // move the argc arguments off the value stack into the new frame's
1154 // locals. they sit at the top of the stack, the rightmost on top; the
1155 // split keeps them in stack order so locals[0] is the leftmost arg.
1156 let argc = argc as usize;
1157 if self.stack.len() < argc {
1158 return Err(self.runtime_err("stack underflow building a call frame"));
1159 }
1160 let base = self.stack.len() - argc;
1161 let locals = self.stack.split_off(base);
1162 self.frames.push(CallFrame {
1163 chunk_idx,
1164 ip: 0,
1165 base,
1166 locals,
1167 });
1168 Ok(())
1169 }
1170
1171 /// the stdlib-dispatch seam: run a native standard-library function.
1172 ///
1173 /// a `CALL` whose `fn_id >= STDLIB_FN_BASE` reaches here. the `argc`
1174 /// argument values are on the value stack with the topmost being the
1175 /// rightmost argument; they are moved off the stack into a `Vec<Value>` in
1176 /// source order (`args[0]` the leftmost) and handed to
1177 /// [`crate::stdlib::dispatch`], which runs the native function and returns
1178 /// its result [`Value`]. that result is pushed onto the value stack so the
1179 /// instruction after the `CALL` sees it -- the same one-result contract a
1180 /// user `RETURN` honors (a `void`-returning stdlib function returns
1181 /// [`Value::void`]).
1182 ///
1183 /// a stdlib `Err` is already a [`QalaError::Runtime`]; it propagates
1184 /// unchanged. a wrong-arity or wrong-type call is the native function's
1185 /// clean `Runtime` error, never a panic -- the bytecode is untrusted.
1186 fn call_stdlib(&mut self, fn_id: u16, argc: u8) -> Result<(), QalaError> {
1187 // move the argc arguments off the value stack. they sit at the top, the
1188 // rightmost on top; split_off keeps them in stack order so args[0] is
1189 // the leftmost argument -- matching how a user CALL builds its frame.
1190 let argc = argc as usize;
1191 if self.stack.len() < argc {
1192 return Err(self.runtime_err("stack underflow building a stdlib call"));
1193 }
1194 let at = self.stack.len() - argc;
1195 let args = self.stack.split_off(at);
1196 let result = crate::stdlib::dispatch(self, fn_id, &args)?;
1197 self.push(result)
1198 }
1199
1200 /// execute a [`Opcode::Return`]: pop the current frame and pass the result
1201 /// back.
1202 ///
1203 /// the call's result is whatever sits ABOVE the frame's `base` on the
1204 /// value stack. a value-returning function leaves its result there before
1205 /// the `RETURN`; a `void` function leaves nothing -- codegen emits a bare
1206 /// `RETURN` with no value pushed for a fall-through exit or a `return`
1207 /// with no operand -- so a `RETURN` whose stack is at or below `base`
1208 /// yields a `void` result. this is why the frame is popped FIRST: the
1209 /// result is decided relative to `base`, never by an unconditional
1210 /// `pop` that would underflow on a void function or steal the caller's
1211 /// top-of-stack.
1212 ///
1213 /// the current [`CallFrame`] is popped and the value stack truncated back
1214 /// to that frame's `base`, discarding the callee's transient stack. when
1215 /// no frame remains the program is finished: returns `Ok(true)` so the
1216 /// caller halts the VM. otherwise the result value is pushed onto the
1217 /// (now caller's) stack and `Ok(false)` is returned.
1218 ///
1219 /// codegen emits an explicit `RETURN` at every function exit, including
1220 /// `main`'s -- a `RETURN` from the last frame is therefore the normal end
1221 /// of a program.
1222 ///
1223 /// the returning frame's local file handles are checked for leaks (see
1224 /// [`Vm::check_frame_handle_leaks`]): a [`HeapObject::FileHandle`] that goes
1225 /// out of scope while still open is logged.
1226 fn op_return(&mut self) -> Result<bool, QalaError> {
1227 let frame = self
1228 .frames
1229 .pop()
1230 .ok_or_else(|| self.runtime_err("return with no active call frame"))?;
1231 // the result is whatever the function left above its base. a void
1232 // function left nothing -- its result is void.
1233 let result = if self.stack.len() > frame.base {
1234 self.stack.pop().unwrap_or_else(Value::void)
1235 } else {
1236 Value::void()
1237 };
1238 // the frame's locals are about to go out of scope -- a file handle
1239 // among them that is still open is a resource leak.
1240 self.check_frame_handle_leaks(&frame.locals, result);
1241 // drop the callee's transient value stack back to its base.
1242 if frame.base <= self.stack.len() {
1243 self.stack.truncate(frame.base);
1244 }
1245 if self.frames.is_empty() {
1246 // the last frame returned -- the program is done. leave the result
1247 // on the stack so a test (or the REPL) can inspect a program's
1248 // final value.
1249 self.stack.push(result);
1250 return Ok(true);
1251 }
1252 // hand the result back to the caller.
1253 self.push(result)?;
1254 Ok(false)
1255 }
1256
1257 /// detect file-handle leaks among a returning frame's local slots.
1258 ///
1259 /// a [`HeapObject::FileHandle`] is a resource: a program is expected to
1260 /// `close` it (directly or via `defer close(f)`) before the handle goes out
1261 /// of scope. when a frame returns, each of its local slots that points at a
1262 /// file handle is decremented through [`Heap::dec`]; if a `dec` drives the
1263 /// slot's refcount to zero and the freed object is a `FileHandle` with
1264 /// `closed == false`, the handle was dropped without a `close` -- a leak --
1265 /// and a message naming the path is pushed onto [`Vm::leak_log`], which
1266 /// `get_state` surfaces for the playground.
1267 ///
1268 /// `returned` is the frame's result value: a handle the function RETURNS is
1269 /// not leaked (its lifetime continues in the caller), so a local slot equal
1270 /// to `returned` is skipped.
1271 ///
1272 /// the check is deliberately scoped to file handles. the VM does not run a
1273 /// full reference-counting discipline in v1 -- copies (`DUP`, `GET_LOCAL`,
1274 /// a value stored into a struct or returned) do not `inc`, so a blanket
1275 /// `dec` of every local pointer would free a still-referenced object early.
1276 /// file handles are the one resource type the leak log must report and, in
1277 /// v1 Qala, are never aliased into another live structure, so decrementing
1278 /// exactly the handle locals is both correct and safe. a fuller refcount
1279 /// discipline (and a cycle collector) is a documented v2 concern.
1280 fn check_frame_handle_leaks(&mut self, locals: &[Value], returned: Value) {
1281 for &local in locals {
1282 // only a pointer can reach a heap object; skip a returned handle.
1283 let Some(slot) = local.as_pointer() else {
1284 continue;
1285 };
1286 if local == returned {
1287 continue;
1288 }
1289 // only act on a slot that currently holds an open file handle --
1290 // leave every other heap object's refcount untouched.
1291 let is_open_handle = matches!(
1292 self.heap.get(slot),
1293 Some(HeapObject::FileHandle { closed: false, .. })
1294 );
1295 if !is_open_handle {
1296 continue;
1297 }
1298 // dec the handle slot; dec hands back the freed object when the
1299 // refcount reaches zero, so a still-open freed handle is a leak.
1300 if let Some(HeapObject::FileHandle {
1301 path,
1302 closed: false,
1303 ..
1304 }) = self.heap.dec(slot)
1305 {
1306 self.leak_log
1307 .push(format!("file handle for {path} dropped without close"));
1308 }
1309 }
1310 }
1311
1312 /// call a function `Value` from native (stdlib) code and run it to its
1313 /// `RETURN`, returning the result value.
1314 ///
1315 /// this is the re-entry point the higher-order stdlib functions
1316 /// (`map` / `filter` / `reduce` in 05-05) use to invoke a user callback.
1317 /// `callable` must be a [`Value::function`] -- the callbacks those stdlib
1318 /// functions receive are user functions; a non-function (or a stdlib fn-id
1319 /// callable, unsupported in v1) is a `Runtime` error.
1320 ///
1321 /// the mechanism: record the current frame depth, push the `args` onto the
1322 /// value stack, push a [`CallFrame`] for the callee (frame-depth-capped,
1323 /// exactly as [`Vm::op_call`] does), then loop [`Vm::dispatch_one`] until
1324 /// the frame stack drops back to the recorded depth -- the callee's
1325 /// `RETURN` popped its frame. the callee's result is then on top of the
1326 /// stack. every loop local stays in a Rust local, so a callback that
1327 /// itself calls `call_function_value` is fully re-entrant.
1328 ///
1329 /// `pub(crate)` so `crate::stdlib`'s `map` / `filter` / `reduce` re-enter
1330 /// the VM through this one helper -- those native functions are the only
1331 /// callers (plus the inline test below).
1332 pub(crate) fn call_function_value(
1333 &mut self,
1334 callable: Value,
1335 args: &[Value],
1336 ) -> Result<Value, QalaError> {
1337 let fn_id = callable
1338 .as_function()
1339 .ok_or_else(|| self.runtime_err("value is not callable"))?;
1340 // a stdlib fn-id as a callback is not supported in v1: the callbacks
1341 // map/filter/reduce receive are user functions.
1342 if fn_id >= STDLIB_FN_BASE {
1343 return Err(self.runtime_err("a stdlib function cannot be used as a callback in v1"));
1344 }
1345 let chunk_idx = fn_id as usize;
1346 if self.program.chunks.get(chunk_idx).is_none() {
1347 return Err(self.runtime_err(&format!("call to missing function {fn_id}")));
1348 }
1349 if self.frames.len() >= MAX_FRAMES {
1350 return Err(self.runtime_err("stack overflow"));
1351 }
1352 // the depth the callee's RETURN must drop the frame stack back to.
1353 let depth = self.frames.len();
1354 // push the arguments, then build the callee frame -- the args become
1355 // the frame's locals, locals[0] the first argument.
1356 for arg in args {
1357 self.push(*arg)?;
1358 }
1359 let base = self.stack.len() - args.len();
1360 let locals = self.stack.split_off(base);
1361 self.frames.push(CallFrame {
1362 chunk_idx,
1363 ip: 0,
1364 base,
1365 locals,
1366 });
1367 // run a nested dispatch loop until the callee's frame is popped. a
1368 // Halted outcome before the depth drops back means the callee fell off
1369 // the end without a RETURN -- a malformed-bytecode Runtime error.
1370 loop {
1371 if self.frames.len() == depth {
1372 break;
1373 }
1374 match self.dispatch_one()? {
1375 StepOutcome::Ran => {}
1376 StepOutcome::Halted => {
1377 if self.frames.len() == depth {
1378 break;
1379 }
1380 return Err(self.runtime_err("callback halted without returning"));
1381 }
1382 }
1383 }
1384 // the callee's RETURN pushed its result onto the stack.
1385 self.pop()
1386 }
1387
1388 // ---- heap construction + access ---------------------------------------
1389
1390 /// pop `n` values off the value stack, returned with the deepest popped
1391 /// value first.
1392 ///
1393 /// the `MAKE_*` opcodes push their elements in order, the last on top; a
1394 /// plain pop loop would yield them reversed, so this restores source order:
1395 /// `result[0]` is the first element / field / payload. a stack with fewer
1396 /// than `n` values is a [`QalaError::Runtime`] "stack underflow", never a
1397 /// panic.
1398 fn pop_n(&mut self, n: usize) -> Result<Vec<Value>, QalaError> {
1399 if self.stack.len() < n {
1400 return Err(self.runtime_err("stack underflow building a heap object"));
1401 }
1402 let at = self.stack.len() - n;
1403 Ok(self.stack.split_off(at))
1404 }
1405
1406 /// build a [`HeapObject::Array`] (or [`HeapObject::Tuple`] when `tuple`)
1407 /// from the top `count` stack values and push a pointer to it.
1408 ///
1409 /// the values come off the stack in source order via [`Vm::pop_n`], so
1410 /// element 0 is the first. a heap exhaustion is a `Runtime` error. `MAKE_*`
1411 /// shares this one body because an array and a tuple differ only in which
1412 /// heap variant labels them -- the distinction lets 05-05's `type_of` tell
1413 /// a tuple from an array.
1414 fn op_make_collection(&mut self, count: u16, tuple: bool) -> Result<(), QalaError> {
1415 let elements = self.pop_n(count as usize)?;
1416 let object = if tuple {
1417 HeapObject::Tuple(elements)
1418 } else {
1419 HeapObject::Array(elements)
1420 };
1421 let slot = self
1422 .heap
1423 .alloc(object)
1424 .ok_or_else(|| self.runtime_err("heap exhausted"))?;
1425 self.push(Value::pointer(slot))
1426 }
1427
1428 /// build a [`HeapObject::Struct`] for the struct whose id is `struct_id`
1429 /// and push a pointer to it.
1430 ///
1431 /// `struct_id` indexes [`Program::structs`]; the [`crate::chunk::StructInfo`]
1432 /// there gives the declared `name` (stored as the heap struct's
1433 /// `type_name`, so `type_of` returns the declared name) and the
1434 /// `field_count` (how many values to pop). the field values come off the
1435 /// stack in declaration order. a bad struct id or a heap exhaustion is a
1436 /// `Runtime` error.
1437 fn op_make_struct(&mut self, struct_id: u16) -> Result<(), QalaError> {
1438 let info = self
1439 .program
1440 .structs
1441 .get(struct_id as usize)
1442 .ok_or_else(|| self.runtime_err(&format!("bad struct id {struct_id}")))?;
1443 let type_name = info.name.clone();
1444 let field_count = info.field_count as usize;
1445 let fields = self.pop_n(field_count)?;
1446 let slot = self
1447 .heap
1448 .alloc(HeapObject::Struct { type_name, fields })
1449 .ok_or_else(|| self.runtime_err("heap exhausted"))?;
1450 self.push(Value::pointer(slot))
1451 }
1452
1453 /// build a [`HeapObject::EnumVariant`] for the variant whose id is
1454 /// `variant_id` and push a pointer to it.
1455 ///
1456 /// `variant_id` indexes [`Program::enum_variant_names`], a table of
1457 /// `(enum_name, variant_name)` pairs; the heap object carries both names so
1458 /// `type_of` and the value-to-string routine can render it, and so
1459 /// `MATCH_VARIANT` can compare a scrutinee against an operand variant id by
1460 /// resolving that id to the same name pair. `payload_count` values are
1461 /// popped in declaration order. a bad variant id or a heap exhaustion is a
1462 /// `Runtime` error.
1463 fn op_make_enum_variant(
1464 &mut self,
1465 variant_id: u16,
1466 payload_count: u8,
1467 ) -> Result<(), QalaError> {
1468 let (enum_name, variant_name) = self
1469 .program
1470 .enum_variant_names
1471 .get(variant_id as usize)
1472 .ok_or_else(|| self.runtime_err(&format!("bad variant id {variant_id}")))?;
1473 let type_name = enum_name.clone();
1474 let variant = variant_name.clone();
1475 let payload = self.pop_n(payload_count as usize)?;
1476 let slot = self
1477 .heap
1478 .alloc(HeapObject::EnumVariant {
1479 type_name,
1480 variant,
1481 payload,
1482 })
1483 .ok_or_else(|| self.runtime_err("heap exhausted"))?;
1484 self.push(Value::pointer(slot))
1485 }
1486
1487 /// execute [`Opcode::Index`]: pop an `i64` index then an array/tuple
1488 /// pointer, push the element at that index.
1489 ///
1490 /// the index is the topmost value, the collection below it. the pointer
1491 /// must reach a [`HeapObject::Array`] or [`HeapObject::Tuple`]; anything
1492 /// else is a `Runtime` error. a negative index, or one at or past the
1493 /// length, is a `Runtime` "array index N out of bounds for length L" -- the
1494 /// message names both the index and the length, and the error span covers
1495 /// the `INDEX` opcode's source line, never an out-of-bounds heap read.
1496 fn op_index(&mut self) -> Result<(), QalaError> {
1497 let index = self.pop_i64()?;
1498 let collection = self.pop()?;
1499 let slot = collection
1500 .as_pointer()
1501 .ok_or_else(|| self.runtime_err("expected an array"))?;
1502 let element = match self.heap.get(slot) {
1503 Some(HeapObject::Array(items)) | Some(HeapObject::Tuple(items)) => {
1504 let len = items.len();
1505 if index < 0 || index as usize >= len {
1506 return Err(self.runtime_err(&format!(
1507 "array index {index} out of bounds for length {len}"
1508 )));
1509 }
1510 items[index as usize]
1511 }
1512 _ => return Err(self.runtime_err("expected an array")),
1513 };
1514 self.push(element)
1515 }
1516
1517 /// execute [`Opcode::Field`]: pop a struct pointer, push the field at
1518 /// `field_index`.
1519 ///
1520 /// `field_index` is the field's stable position in the struct's
1521 /// declaration order -- codegen's `struct_field_index`. the pointer must
1522 /// reach a [`HeapObject::Struct`]; a non-struct, or an index past the
1523 /// field count, is a `Runtime` error.
1524 fn op_field(&mut self, field_index: u16) -> Result<(), QalaError> {
1525 let target = self.pop()?;
1526 let slot = target
1527 .as_pointer()
1528 .ok_or_else(|| self.runtime_err("expected a struct"))?;
1529 let field = match self.heap.get(slot) {
1530 Some(HeapObject::Struct { fields, .. }) => fields
1531 .get(field_index as usize)
1532 .copied()
1533 .ok_or_else(|| self.runtime_err(&format!("bad field index {field_index}")))?,
1534 _ => return Err(self.runtime_err("expected a struct")),
1535 };
1536 self.push(field)
1537 }
1538
1539 /// execute [`Opcode::Len`]: pop an array, tuple, or string, push its length
1540 /// as a heap `i64`.
1541 ///
1542 /// an array's / tuple's length is its element count. a string's length is
1543 /// its count of Unicode scalar values (`chars().count()`) -- the
1544 /// user-facing "number of characters", not the UTF-8 byte count, matching
1545 /// what a teaching language's `len` of a string is expected to mean. any
1546 /// other value is a `Runtime` error.
1547 fn op_len(&mut self) -> Result<(), QalaError> {
1548 let value = self.pop()?;
1549 let slot = value
1550 .as_pointer()
1551 .ok_or_else(|| self.runtime_err("expected an array or string"))?;
1552 let len = match self.heap.get(slot) {
1553 Some(HeapObject::Array(items)) | Some(HeapObject::Tuple(items)) => items.len(),
1554 Some(HeapObject::Str(s)) => s.chars().count(),
1555 _ => return Err(self.runtime_err("expected an array or string")),
1556 };
1557 self.push_i64(len as i64)
1558 }
1559
1560 // ---- the value-to-string routine + string opcodes ---------------------
1561
1562 /// render a runtime [`Value`] to its display string.
1563 ///
1564 /// the spelling is locked and matches `ConstValue`'s `Display` for the
1565 /// primitive kinds, so a runtime value renders the same way its
1566 /// compile-time constant would:
1567 /// - an `i64` (a heap `Int`): the decimal form, e.g. `-7`.
1568 /// - an `f64`: non-finite values hand-spelled `NaN` / `inf` / `-inf`
1569 /// (so a `println` of a NaN float shows `NaN`); a finite value uses
1570 /// Rust's default `f64` `Display`.
1571 /// - a `bool`: `true` / `false`.
1572 /// - a `byte`: the plain decimal value (e.g. `65`) -- the readable form for
1573 /// a `println`, NOT `ConstValue`'s `b'\xNN'` disassembly spelling.
1574 /// - `void`: `()`.
1575 /// - a `str` (a heap `Str`): the raw inner text, UNquoted -- `println` of a
1576 /// string shows the string itself, not a quoted literal.
1577 /// - an array `[a, b, c]`, a tuple `(a, b, c)`, a struct
1578 /// `Name { f0, f1 }`, an enum variant `Enum::Variant(p0, p1)` (or just
1579 /// `Enum::Variant` with no payload) -- rendered recursively, up to
1580 /// [`MAX_DISPLAY_DEPTH`] levels deep; beyond that, `"<...>"` is emitted.
1581 /// - a file handle: a `<file "path">` placeholder; the VM does no real I/O.
1582 ///
1583 /// reused by [`Vm::op_to_str`], [`Vm::op_concat_n`], and the native stdlib's
1584 /// `print` / `println`. a dangling pointer (a freed slot) is rendered as
1585 /// `<dangling>` rather than erroring -- a display routine must never fail.
1586 ///
1587 /// `pub(crate)` so `crate::stdlib`'s `print` / `println` render their
1588 /// argument through the one display routine the rest of the VM uses.
1589 pub(crate) fn value_to_string(&self, v: Value) -> String {
1590 self.value_to_string_depth(v, 0)
1591 }
1592
1593 /// depth-bounded inner worker for [`Vm::value_to_string`].
1594 fn value_to_string_depth(&self, v: Value, depth: u32) -> String {
1595 if depth > MAX_DISPLAY_DEPTH {
1596 return "<...>".to_string();
1597 }
1598 // a tagged scalar: bool / byte / void / function decode directly.
1599 if let Some(b) = v.as_bool() {
1600 return if b {
1601 "true".to_string()
1602 } else {
1603 "false".to_string()
1604 };
1605 }
1606 if let Some(b) = v.as_byte() {
1607 return b.to_string();
1608 }
1609 if v.as_void() {
1610 return "()".to_string();
1611 }
1612 if let Some(id) = v.as_function() {
1613 return format!("fn#{id}");
1614 }
1615 // a pointer: dispatch on the heap object.
1616 if let Some(slot) = v.as_pointer() {
1617 return match self.heap.get(slot) {
1618 Some(HeapObject::Int(n)) => n.to_string(),
1619 Some(HeapObject::Str(s)) => s.clone(),
1620 Some(HeapObject::Array(items)) => {
1621 let parts: Vec<String> = items
1622 .iter()
1623 .map(|e| self.value_to_string_depth(*e, depth + 1))
1624 .collect();
1625 format!("[{}]", parts.join(", "))
1626 }
1627 Some(HeapObject::Tuple(items)) => {
1628 let parts: Vec<String> = items
1629 .iter()
1630 .map(|e| self.value_to_string_depth(*e, depth + 1))
1631 .collect();
1632 format!("({})", parts.join(", "))
1633 }
1634 Some(HeapObject::Struct { type_name, fields }) => {
1635 let parts: Vec<String> = fields
1636 .iter()
1637 .map(|e| self.value_to_string_depth(*e, depth + 1))
1638 .collect();
1639 format!("{type_name} {{ {} }}", parts.join(", "))
1640 }
1641 Some(HeapObject::EnumVariant {
1642 type_name,
1643 variant,
1644 payload,
1645 }) => {
1646 if payload.is_empty() {
1647 format!("{type_name}::{variant}")
1648 } else {
1649 let parts: Vec<String> = payload
1650 .iter()
1651 .map(|e| self.value_to_string_depth(*e, depth + 1))
1652 .collect();
1653 format!("{type_name}::{variant}({})", parts.join(", "))
1654 }
1655 }
1656 Some(HeapObject::FileHandle { path, .. }) => format!("<file \"{path}\">"),
1657 None => "<dangling>".to_string(),
1658 };
1659 }
1660 // not tagged and not a pointer: a real f64. hand-spell non-finite
1661 // values to match ConstValue's Display.
1662 match v.as_f64() {
1663 Some(x) if x.is_nan() => "NaN".to_string(),
1664 Some(x) if x == f64::INFINITY => "inf".to_string(),
1665 Some(x) if x == f64::NEG_INFINITY => "-inf".to_string(),
1666 Some(x) => format!("{x}"),
1667 // unreachable: a value is a tagged scalar, a pointer, or an f64.
1668 None => "<unknown>".to_string(),
1669 }
1670 }
1671
1672 /// the runtime type name of a [`Value`].
1673 ///
1674 /// the single source of truth for "what type is this value at runtime",
1675 /// reused by [`Vm::get_state`] (to type-tint each stack slot and variable)
1676 /// AND by `crate::stdlib`'s `type_of` function -- both must agree, so the
1677 /// logic lives here once and `type_of` is `pub(crate)`-reachable. the names
1678 /// match the typechecker's canonical lowercase spelling:
1679 /// - a primitive: `i64`, `f64`, `bool`, `byte`, `void`, `str`.
1680 /// - an array: `[T]` where `T` is the element type of the first element,
1681 /// e.g. `[i64]`. an empty array has no element to inspect, so it renders
1682 /// the bare `[]`.
1683 /// - a tuple: `(T, U, ...)` over the element types, e.g. `(i64, str)`. an
1684 /// empty tuple renders `()`.
1685 /// - a struct: its declared name, e.g. `Point`.
1686 /// - an enum variant: `Enum::Variant`, e.g. `Shape::Circle`.
1687 /// - a file handle: `FileHandle`.
1688 /// - a function value: `fn`.
1689 /// - a dangling pointer (a freed heap slot): `?` -- a type name must
1690 /// always be produced; this never errors.
1691 pub(crate) fn runtime_type_name(&self, v: Value) -> String {
1692 self.runtime_type_name_depth(v, 0)
1693 }
1694
1695 /// depth-bounded inner worker for [`Vm::runtime_type_name`].
1696 fn runtime_type_name_depth(&self, v: Value, depth: u32) -> String {
1697 if depth > MAX_DISPLAY_DEPTH {
1698 return "...".to_string();
1699 }
1700 // tagged scalars decode without touching the heap.
1701 if v.as_bool().is_some() {
1702 return "bool".to_string();
1703 }
1704 if v.as_byte().is_some() {
1705 return "byte".to_string();
1706 }
1707 if v.as_void() {
1708 return "void".to_string();
1709 }
1710 if v.as_function().is_some() {
1711 return "fn".to_string();
1712 }
1713 // a pointer: the heap object decides the type.
1714 if let Some(slot) = v.as_pointer() {
1715 return match self.heap.get(slot) {
1716 Some(HeapObject::Int(_)) => "i64".to_string(),
1717 Some(HeapObject::Str(_)) => "str".to_string(),
1718 Some(HeapObject::Array(items)) => match items.first() {
1719 Some(first) => {
1720 format!("[{}]", self.runtime_type_name_depth(*first, depth + 1))
1721 }
1722 None => "[]".to_string(),
1723 },
1724 Some(HeapObject::Tuple(items)) => {
1725 let parts: Vec<String> = items
1726 .iter()
1727 .map(|e| self.runtime_type_name_depth(*e, depth + 1))
1728 .collect();
1729 format!("({})", parts.join(", "))
1730 }
1731 Some(HeapObject::Struct { type_name, .. }) => type_name.clone(),
1732 Some(HeapObject::EnumVariant {
1733 type_name, variant, ..
1734 }) => format!("{type_name}::{variant}"),
1735 Some(HeapObject::FileHandle { .. }) => "FileHandle".to_string(),
1736 None => "?".to_string(),
1737 };
1738 }
1739 // not tagged, not a pointer: a real f64.
1740 "f64".to_string()
1741 }
1742
1743 /// render a runtime [`Value`] to its `(display string, type name)` pair.
1744 ///
1745 /// the public counterpart of the in-crate [`Vm::value_to_string`] and
1746 /// [`Vm::runtime_type_name`] helpers, for an external consumer that holds a
1747 /// [`Value`] -- specifically the `qala` CLI's REPL, which evaluates a line
1748 /// against this VM and must display the result. the WASM bridge does not use
1749 /// this method (it is in-crate and reaches the two helpers directly, building
1750 /// a `StateValue`); this exists so the separate `qala-cli` crate has the same
1751 /// rendering without the two helpers leaving `pub(crate)`.
1752 ///
1753 /// purely additive: it calls the two existing helpers and changes no v1
1754 /// behavior. never panics -- the helpers are total over every `Value`.
1755 pub fn render_value(&self, v: Value) -> (String, String) {
1756 (self.value_to_string(v), self.runtime_type_name(v))
1757 }
1758
1759 /// execute [`Opcode::ToStr`]: pop one value, push its string form as a
1760 /// heap [`HeapObject::Str`].
1761 ///
1762 /// used to materialise an interpolated segment whose static type is not
1763 /// already `str`. a heap exhaustion is a `Runtime` error.
1764 fn op_to_str(&mut self) -> Result<(), QalaError> {
1765 let v = self.pop()?;
1766 let s = self.value_to_string(v);
1767 let slot = self
1768 .heap
1769 .alloc(HeapObject::Str(s))
1770 .ok_or_else(|| self.runtime_err("heap exhausted"))?;
1771 self.push(Value::pointer(slot))
1772 }
1773
1774 /// execute [`Opcode::ConcatN`]: pop `count` values, concatenate their
1775 /// string forms in source order, push the result as a heap `Str`.
1776 ///
1777 /// the values come off the stack in source order via [`Vm::pop_n`] (the
1778 /// last-pushed is the last segment). each is rendered with
1779 /// [`Vm::value_to_string`], so a string interpolation of a NaN float
1780 /// renders `NaN`. used to materialise string interpolation. a heap
1781 /// exhaustion is a `Runtime` error.
1782 fn op_concat_n(&mut self, count: u16) -> Result<(), QalaError> {
1783 let parts = self.pop_n(count as usize)?;
1784 let mut joined = String::new();
1785 for part in parts {
1786 joined.push_str(&self.value_to_string(part));
1787 }
1788 let slot = self
1789 .heap
1790 .alloc(HeapObject::Str(joined))
1791 .ok_or_else(|| self.runtime_err("heap exhausted"))?;
1792 self.push(Value::pointer(slot))
1793 }
1794
1795 /// execute [`Opcode::MatchVariant`]: test the scrutinee on top of the stack
1796 /// against `variant_id` and either destructure it or branch.
1797 ///
1798 /// `fall_through` is the byte after the whole four-byte-operand instruction
1799 /// -- the position the signed miss `offset` is relative to.
1800 ///
1801 /// the scrutinee must be a [`HeapObject::EnumVariant`]; a non-enum
1802 /// scrutinee is a `Runtime` error. the comparison key is the
1803 /// `(enum_name, variant_name)` pair: the heap `EnumVariant` stores names,
1804 /// not the id, so the operand `variant_id` is resolved through
1805 /// [`Program::enum_variant_names`] to a name pair and the two pairs are
1806 /// compared. on a match the scrutinee is consumed (`pop`) and each payload
1807 /// value is pushed -- the first payload field deepest, the last on top --
1808 /// so the arm's destructuring `SET_LOCAL`s bind them. on a miss the
1809 /// scrutinee is left on the stack and `ip` is set to `fall_through + offset`
1810 /// (bounds-checked into the chunk via [`Vm::do_jump`]) so a `MATCH_VARIANT`
1811 /// chain can re-test the same scrutinee against the next arm.
1812 fn op_match_variant(
1813 &mut self,
1814 variant_id: u16,
1815 offset: i16,
1816 fall_through: usize,
1817 ) -> Result<(), QalaError> {
1818 // peek the scrutinee without consuming it -- a miss must leave it.
1819 let scrutinee = *self
1820 .stack
1821 .last()
1822 .ok_or_else(|| self.runtime_err("stack underflow at a match"))?;
1823 let slot = scrutinee
1824 .as_pointer()
1825 .ok_or_else(|| self.runtime_err("match scrutinee is not an enum value"))?;
1826 // resolve the operand variant id to its (enum, variant) name pair.
1827 let (want_enum, want_variant) = self
1828 .program
1829 .enum_variant_names
1830 .get(variant_id as usize)
1831 .ok_or_else(|| self.runtime_err(&format!("bad variant id {variant_id}")))?
1832 .clone();
1833 // read the scrutinee's own enum/variant names and payload.
1834 let (matches, payload) = match self.heap.get(slot) {
1835 Some(HeapObject::EnumVariant {
1836 type_name,
1837 variant,
1838 payload,
1839 }) => {
1840 let hit = *type_name == want_enum && *variant == want_variant;
1841 (hit, if hit { payload.clone() } else { Vec::new() })
1842 }
1843 _ => {
1844 return Err(self.runtime_err("match scrutinee is not an enum value"));
1845 }
1846 };
1847 if matches {
1848 // consume the scrutinee, push the payload (first field deepest).
1849 self.pop()?;
1850 for value in payload {
1851 self.push(value)?;
1852 }
1853 Ok(())
1854 } else {
1855 // a miss: leave the scrutinee, branch to the next arm.
1856 self.do_jump(fall_through, offset)
1857 }
1858 }
1859
1860 /// run the program from the current state to `Halt` or the first runtime
1861 /// error.
1862 ///
1863 /// loops over [`Vm::dispatch_one`]; a `Ran` outcome continues, a `Halted`
1864 /// outcome returns `Ok(())`. shares every byte of execution logic with
1865 /// [`Vm::step`] -- both go through `dispatch_one`.
1866 pub fn run(&mut self) -> Result<(), QalaError> {
1867 loop {
1868 match self.dispatch_one()? {
1869 StepOutcome::Ran => continue,
1870 StepOutcome::Halted => return Ok(()),
1871 }
1872 }
1873 }
1874
1875 /// advance exactly one instruction.
1876 ///
1877 /// one `step()` call executes one full instruction including its operands:
1878 /// `ip` moves by `1 + operand_bytes()` of the executed opcode. the
1879 /// playground's step-through calls this in a loop. a thin wrapper over
1880 /// [`Vm::dispatch_one`] -- no duplicated dispatch logic.
1881 pub fn step(&mut self) -> Result<StepOutcome, QalaError> {
1882 self.dispatch_one()
1883 }
1884
1885 /// snapshot the VM's execution state for the playground's step-through.
1886 ///
1887 /// the [`VmState`] carries the current chunk index and instruction
1888 /// pointer, the value stack (each slot rendered and type-tagged), the
1889 /// current frame's in-scope variables paired with their REAL source
1890 /// names, the accumulated console output, and the leak log.
1891 ///
1892 /// the output is deterministic -- it iterates `Vec`s in index order, never
1893 /// a `HashMap`, so two `get_state` calls on the same VM state produce
1894 /// byte-identical snapshots (the contract Phase 6's WASM bridge needs).
1895 ///
1896 /// never panics. when `frames` is empty (the program has finished and the
1897 /// last frame returned) it reports a terminal snapshot: the last chunk
1898 /// index and an `ip` one past that chunk's code -- not an out-of-bounds
1899 /// index, not a panic. an out-of-range `chunk_idx` is handled the same
1900 /// defensive way.
1901 pub fn get_state(&self) -> VmState {
1902 // the value stack, bottom-to-top, each slot rendered + type-tagged.
1903 let stack: Vec<StateValue> = self
1904 .stack
1905 .iter()
1906 .map(|v| StateValue {
1907 rendered: self.value_to_string(*v),
1908 type_name: self.runtime_type_name(*v),
1909 })
1910 .collect();
1911
1912 // the current frame decides the chunk index, ip, and variables. an
1913 // empty frame stack is the finished-program case -- a terminal
1914 // snapshot, never a panic.
1915 let (chunk_index, ip, variables) = match self.frames.last() {
1916 Some(frame) => {
1917 let names = self
1918 .program
1919 .chunks
1920 .get(frame.chunk_idx)
1921 .map(|c| c.local_names.as_slice())
1922 .unwrap_or(&[]);
1923 // pair each local slot with its source name; a slot with no
1924 // recorded name (a compiler temporary) falls back to slot{i}.
1925 let variables: Vec<NamedValue> = frame
1926 .locals
1927 .iter()
1928 .enumerate()
1929 .map(|(i, v)| {
1930 let name = match names.get(i) {
1931 Some(n) if !n.is_empty() => n.clone(),
1932 _ => format!("slot{i}"),
1933 };
1934 NamedValue {
1935 name,
1936 value: StateValue {
1937 rendered: self.value_to_string(*v),
1938 type_name: self.runtime_type_name(*v),
1939 },
1940 }
1941 })
1942 .collect();
1943 (frame.chunk_idx, frame.ip, variables)
1944 }
1945 None => {
1946 // the program finished: the last chunk, an ip past its end.
1947 let last_idx = self.program.chunks.len().saturating_sub(1);
1948 let ip = self
1949 .program
1950 .chunks
1951 .get(last_idx)
1952 .map(|c| c.code.len())
1953 .unwrap_or(0);
1954 (last_idx, ip, Vec::new())
1955 }
1956 };
1957
1958 // the 1-based source line of the current instruction, mirroring the
1959 // runtime-error path's lookup. 0 when frames is empty (the terminal
1960 // snapshot), when the chunk is missing, or when ip is past the source
1961 // map -- the playground reads 0 as "no line to highlight".
1962 let current_line = self
1963 .program
1964 .chunks
1965 .get(chunk_index)
1966 .and_then(|c| c.source_lines.get(ip).copied())
1967 .unwrap_or(0) as usize;
1968
1969 VmState {
1970 chunk_index,
1971 ip,
1972 current_line,
1973 stack,
1974 variables,
1975 console: self.console.clone(),
1976 leak_log: self.leak_log.clone(),
1977 }
1978 }
1979
1980 /// evaluate one line of REPL source against the persistent VM state.
1981 ///
1982 /// the accumulating-source REPL: every prior accepted line plus `source`
1983 /// is concatenated into one wrapped program, the whole pipeline (lex,
1984 /// parse, typecheck, codegen) runs on it, and the result executes against
1985 /// this VM. because the whole accumulated program is rebuilt and re-run
1986 /// each call, a `let` binding from an earlier call is simply an earlier
1987 /// statement in the same body and is in scope for the new line -- that is
1988 /// how state persists.
1989 ///
1990 /// the wrapping shape: the accepted statements plus the new line live
1991 /// inside a synthetic `fn __repl_main() is io { ... }`. when the new line
1992 /// is an expression its value is captured via a `let __repl_result =
1993 /// <expr>` binding (the result this method returns); a statement line
1994 /// yields `void`. a line that parses as a top-level item (`fn` / `struct`
1995 /// / `enum` / `interface`) is placed OUTSIDE `__repl_main` as a sibling
1996 /// item and the result is `void`.
1997 ///
1998 /// on a lexer / parser / typechecker / codegen error the error is returned
1999 /// and `source` is NOT appended to the history -- a line that does not
2000 /// compile cannot poison later evaluations. the persistent `console` and
2001 /// `leak_log` survive across calls so output accumulates; the heap is
2002 /// naturally rebuilt each call because the whole program re-runs from
2003 /// scratch (correct and simplest -- there is no stale heap state).
2004 pub fn repl_eval(&mut self, source: &str) -> Result<Value, QalaError> {
2005 let kind = classify_repl_line(source);
2006 let combined = self.build_repl_source(source, kind);
2007
2008 // run the full pipeline on the wrapped accumulated source. any failure
2009 // returns the error WITHOUT touching `repl_history`.
2010 let tokens = crate::lexer::Lexer::tokenize(&combined)?;
2011 let ast = crate::parser::Parser::parse(&tokens)?;
2012 let (typed, type_errors, _warnings) = crate::typechecker::check_program(&ast, &combined);
2013 if let Some(first) = type_errors.into_iter().next() {
2014 return Err(first);
2015 }
2016 let program = crate::codegen::compile_program(&typed, &combined).map_err(|errors| {
2017 errors
2018 .into_iter()
2019 .next()
2020 .unwrap_or_else(|| QalaError::Runtime {
2021 span: Span::new(0, 0),
2022 message: "codegen failed".to_string(),
2023 })
2024 })?;
2025
2026 // run the freshly built program against a reset executable state --
2027 // a fresh value stack, a fresh heap, a frame at the entry point --
2028 // but KEEP the persistent console / leak_log so output accumulates.
2029 let result = self.run_repl_program(program, &combined, kind)?;
2030
2031 // the line compiled and ran: it is now part of the accumulated source.
2032 self.repl_history.push(source.to_string());
2033 Ok(result)
2034 }
2035
2036 /// assemble the combined wrapped source for one REPL call.
2037 ///
2038 /// every history line is re-classified (an item goes outside the synthetic
2039 /// function, a statement / expression inside its body) and joined with the
2040 /// new line. a history expression line becomes a bare expression statement;
2041 /// only the NEW line, when it is an expression, gets the `let
2042 /// __repl_result = ...` capture binding. lines are joined with newlines so
2043 /// source spans stay sane for any diagnostic.
2044 fn build_repl_source(&self, new_line: &str, new_kind: ReplLineKind) -> String {
2045 let mut items = String::new();
2046 let mut body = String::new();
2047 // prior accepted lines: classify each, route items out, the rest in.
2048 for line in &self.repl_history {
2049 match classify_repl_line(line) {
2050 ReplLineKind::Item => {
2051 items.push_str(line);
2052 items.push('\n');
2053 }
2054 ReplLineKind::Expression | ReplLineKind::Statement => {
2055 body.push_str(line);
2056 body.push('\n');
2057 }
2058 }
2059 }
2060 // the new line: an item goes outside; an expression gets the capture
2061 // binding; a statement goes in as-is.
2062 match new_kind {
2063 ReplLineKind::Item => {
2064 items.push_str(new_line);
2065 items.push('\n');
2066 }
2067 ReplLineKind::Expression => {
2068 body.push_str("let ");
2069 body.push_str(REPL_RESULT_NAME);
2070 body.push_str(" = ");
2071 body.push_str(new_line);
2072 body.push('\n');
2073 }
2074 ReplLineKind::Statement => {
2075 body.push_str(new_line);
2076 body.push('\n');
2077 }
2078 }
2079 format!("{items}fn {REPL_ENTRY_NAME}() is io {{\n{body}}}\n")
2080 }
2081
2082 /// run a freshly compiled REPL `program` and recover the result value.
2083 ///
2084 /// resets the executable state (a fresh value stack, a fresh heap, the
2085 /// entry frame) while keeping the persistent `console` / `leak_log`, then
2086 /// runs to completion. the result is the `__repl_result` local of the
2087 /// `__repl_main` frame, captured the instant before that frame's `RETURN`
2088 /// executes; when the line was a statement or an item there is no such
2089 /// local and the result is `void`.
2090 fn run_repl_program(
2091 &mut self,
2092 program: Program,
2093 combined: &str,
2094 kind: ReplLineKind,
2095 ) -> Result<Value, QalaError> {
2096 // the entry chunk: the fn named __repl_main. compile_program sets
2097 // main_index only for a fn named `main`, so locate __repl_main and
2098 // point the VM at it explicitly.
2099 let entry = program
2100 .fn_names
2101 .iter()
2102 .position(|n| n == REPL_ENTRY_NAME)
2103 .ok_or_else(|| QalaError::Runtime {
2104 span: Span::new(0, 0),
2105 message: "repl: the synthetic entry function is missing".to_string(),
2106 })?;
2107 // the slot of __repl_result in the entry chunk, if the new line was an
2108 // expression -- read from the local-names table this VM's codegen built.
2109 let result_slot: Option<usize> = if kind == ReplLineKind::Expression {
2110 program
2111 .chunks
2112 .get(entry)
2113 .and_then(|c| c.local_names.iter().position(|n| n == REPL_RESULT_NAME))
2114 } else {
2115 None
2116 };
2117
2118 // reset the executable state; KEEP console + leak_log.
2119 self.program = program;
2120 self.src = combined.to_string();
2121 self.stack.clear();
2122 self.heap = Heap::new();
2123 self.globals.clear();
2124 self.frames = vec![CallFrame {
2125 chunk_idx: entry,
2126 ip: 0,
2127 base: 0,
2128 locals: Vec::new(),
2129 }];
2130
2131 // run, capturing __repl_result the instant before __repl_main RETURNs.
2132 let mut captured = Value::void();
2133 loop {
2134 // before dispatch: is the top frame __repl_main, about to RETURN?
2135 if let Some(slot) = result_slot
2136 && let Some(frame) = self.frames.last()
2137 && frame.chunk_idx == entry
2138 && let Some(chunk) = self.program.chunks.get(entry)
2139 && chunk.code.get(frame.ip).copied() == Some(Opcode::Return as u8)
2140 && let Some(v) = frame.locals.get(slot)
2141 {
2142 // the value bound to __repl_result is in the frame's locals.
2143 captured = *v;
2144 }
2145 match self.dispatch_one()? {
2146 StepOutcome::Ran => continue,
2147 StepOutcome::Halted => break,
2148 }
2149 }
2150 Ok(captured)
2151 }
2152}
2153
2154/// the synthetic name of the REPL's entry function. each REPL call wraps the
2155/// accumulated statements in `fn __repl_main() is io { ... }`.
2156const REPL_ENTRY_NAME: &str = "__repl_main";
2157
2158/// the synthetic name of the REPL's result local. when a REPL line is an
2159/// expression its value is bound to `let __repl_result = <expr>` so it lands
2160/// in a named slot the VM can read back.
2161const REPL_RESULT_NAME: &str = "__repl_result";
2162
2163/// how a REPL source line is shaped, deciding where it is placed in the
2164/// wrapped program.
2165#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2166enum ReplLineKind {
2167 /// a top-level item (`fn` / `struct` / `enum` / `interface`) -- placed
2168 /// outside the synthetic entry function as a sibling item.
2169 Item,
2170 /// an expression -- bound to `let __repl_result = <expr>` so its value can
2171 /// be recovered as the result.
2172 Expression,
2173 /// a statement (`let`, ...) -- placed in the body as-is; the result is
2174 /// `void`.
2175 Statement,
2176}
2177
2178/// classify a REPL source line as an item, an expression, or a statement.
2179///
2180/// the line is probed by trial parse, never by guessing on its first token:
2181/// - first: if wrapping it as `let __t = <line>` parses, it is an
2182/// [`ReplLineKind::Expression`]. this probe runs BEFORE the item probe so
2183/// that a function-call expression (e.g. `double(5)`) that the parser also
2184/// accepts as a complete one-item program is correctly classified as an
2185/// expression and its return value captured.
2186/// - then: if it parses as a complete program with at least one item, it is an
2187/// [`ReplLineKind::Item`] (a `fn` / `struct` / `enum` / `interface`
2188/// definition). a genuine definition cannot parse as `let __t = <line>`, so
2189/// it reaches this probe.
2190/// - else it is a [`ReplLineKind::Statement`] (a `let` binding, or anything
2191/// else -- the real pipeline run surfaces a genuine error for true garbage,
2192/// so misclassifying garbage as a statement is harmless).
2193fn classify_repl_line(line: &str) -> ReplLineKind {
2194 // expression probe first: a line that fits `let __t = <line>` is an
2195 // expression -- covers function-call-shaped lines like `fn_name(x)`.
2196 let probe = format!("fn __probe() is io {{ let __t = {line}\n}}\n");
2197 if let Ok(tokens) = crate::lexer::Lexer::tokenize(&probe)
2198 && crate::parser::Parser::parse(&tokens).is_ok()
2199 {
2200 return ReplLineKind::Expression;
2201 }
2202 // item probe: a line that parses as a whole program with >= 1 item.
2203 if let Ok(tokens) = crate::lexer::Lexer::tokenize(line)
2204 && let Ok(ast) = crate::parser::Parser::parse(&tokens)
2205 && !ast.is_empty()
2206 {
2207 return ReplLineKind::Item;
2208 }
2209 // everything else: a statement.
2210 ReplLineKind::Statement
2211}
2212
2213/// what one [`Vm::dispatch_one`] call did.
2214///
2215/// `Ran` -- an ordinary instruction executed and the VM should keep going.
2216/// `Halted` -- the VM hit [`Opcode::Halt`] (or fell off the end of `main` once
2217/// the call machinery lands) and execution is complete. a runtime fault is the
2218/// `Err` arm of the surrounding `Result`, not a variant here.
2219#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2220pub enum StepOutcome {
2221 /// an ordinary instruction ran; `run` should dispatch the next one.
2222 Ran,
2223 /// the VM halted; `run` returns `Ok(())`.
2224 Halted,
2225}
2226
2227/// which checked `i64` arithmetic the `op_arith_i64` handler performs.
2228///
2229/// a small private selector so the five integer-arithmetic opcodes share one
2230/// handler instead of five near-identical bodies.
2231#[derive(Clone, Copy)]
2232enum IntOp {
2233 /// checked addition.
2234 Add,
2235 /// checked subtraction.
2236 Sub,
2237 /// checked multiplication.
2238 Mul,
2239 /// checked division -- a zero divisor (or `i64::MIN / -1`) is an error.
2240 Div,
2241 /// checked remainder -- a zero divisor (or `i64::MIN % -1`) is an error.
2242 Mod,
2243}
2244
2245/// which IEEE 754 `f64` arithmetic the `op_arith_f64` handler performs.
2246///
2247/// the float counterpart of [`IntOp`]; there is no `FloatOp::Neg` because
2248/// `FNeg` is a single sign flip handled inline.
2249#[derive(Clone, Copy)]
2250enum FloatOp {
2251 /// addition.
2252 Add,
2253 /// subtraction.
2254 Sub,
2255 /// multiplication.
2256 Mul,
2257 /// division -- never an error; a zero divisor yields `inf` / `NaN`.
2258 Div,
2259}
2260
2261/// which comparison the `op_compare` / `op_compare_f64` handlers test.
2262///
2263/// shared by the integer/string/bool comparisons and the `f64` comparisons so
2264/// the twelve comparison opcodes route through two handlers.
2265#[derive(Clone, Copy)]
2266enum CmpOp {
2267 /// `==`
2268 Eq,
2269 /// `!=`
2270 Ne,
2271 /// `<`
2272 Lt,
2273 /// `<=`
2274 Le,
2275 /// `>`
2276 Gt,
2277 /// `>=`
2278 Ge,
2279}
2280
2281impl CmpOp {
2282 /// whether this comparison holds for an [`Ordering`](std::cmp::Ordering).
2283 ///
2284 /// used by the integer / string / bool comparison path, which reduces two
2285 /// operands to one `Ordering` and then asks each `CmpOp` whether it is
2286 /// satisfied. the `f64` path does not use this -- IEEE 754 has an
2287 /// unordered case (`NaN`) that an `Ordering` cannot express, so the float
2288 /// handler compares with `f64`'s own operators directly.
2289 fn holds(self, ordering: std::cmp::Ordering) -> bool {
2290 use std::cmp::Ordering::{Equal, Greater, Less};
2291 match self {
2292 CmpOp::Eq => ordering == Equal,
2293 CmpOp::Ne => ordering != Equal,
2294 CmpOp::Lt => ordering == Less,
2295 CmpOp::Le => ordering != Greater,
2296 CmpOp::Gt => ordering == Greater,
2297 CmpOp::Ge => ordering != Less,
2298 }
2299 }
2300}
2301
2302/// the byte-range [`Span`] of a 1-based `line` in `src`.
2303///
2304/// scans for the line's start and end byte offsets. a line number past the end
2305/// of the source yields a zero-width span at the source end -- the caller
2306/// (a runtime-error builder) must never panic.
2307fn line_span(index: &LineIndex, src: &str, line: u32) -> Span {
2308 // the start byte of the line: the offset whose location is (line, 1).
2309 // bytes() scan keeps this allocation-free and UTF-8-correct (line breaks
2310 // are ASCII).
2311 let mut starts: Vec<usize> = vec![0];
2312 for (i, b) in src.bytes().enumerate() {
2313 if b == b'\n' {
2314 starts.push(i + 1);
2315 }
2316 }
2317 let line_idx = (line as usize).saturating_sub(1);
2318 let Some(&start) = starts.get(line_idx) else {
2319 // line past the end of the source: a zero-width span at the end.
2320 return Span::new(src.len(), 0);
2321 };
2322 // the end byte is the start of the next line, or the source end for the
2323 // last line. trim a trailing '\n' / '\r' so the span covers the line text
2324 // itself, not its terminator.
2325 let mut end = starts.get(line_idx + 1).copied().unwrap_or(src.len());
2326 let bytes = src.as_bytes();
2327 while end > start && (bytes[end - 1] == b'\n' || bytes[end - 1] == b'\r') {
2328 end -= 1;
2329 }
2330 // `index` is accepted for API symmetry with the caller's LineIndex; the
2331 // scan above is the authoritative line-start computation.
2332 let _ = index;
2333 Span::new(start, end - start)
2334}
2335
2336#[cfg(test)]
2337mod tests {
2338 use super::*;
2339
2340 /// a one-chunk program whose `main` chunk is `chunk`, named `"main"`.
2341 fn program_with(chunk: Chunk) -> Program {
2342 let mut p = Program::new();
2343 p.chunks.push(chunk);
2344 p.fn_names.push("main".to_string());
2345 p.main_index = 0;
2346 p
2347 }
2348
2349 // ---- heap lifecycle ----
2350 //
2351 // HeapObject has no Debug derive (it carries Value, whose locked derive
2352 // list omits Debug), so these tests compare with `==` and `matches!`
2353 // rather than assert_eq! / assert_ne! on HeapObject-valued expressions.
2354
2355 #[test]
2356 fn heap_alloc_then_get_round_trips_the_object() {
2357 let mut h = Heap::new();
2358 let slot = h.alloc(HeapObject::Int(42)).expect("alloc");
2359 assert!(h.get(slot) == Some(&HeapObject::Int(42)));
2360 }
2361
2362 #[test]
2363 fn heap_alloc_hands_out_distinct_slots() {
2364 let mut h = Heap::new();
2365 let a = h.alloc(HeapObject::Int(1)).expect("alloc a");
2366 let b = h.alloc(HeapObject::Int(2)).expect("alloc b");
2367 assert_ne!(a, b, "two live allocations must get distinct slots");
2368 assert!(h.get(a) == Some(&HeapObject::Int(1)));
2369 assert!(h.get(b) == Some(&HeapObject::Int(2)));
2370 }
2371
2372 #[test]
2373 fn heap_get_of_a_bad_slot_is_none_not_a_panic() {
2374 let h = Heap::new();
2375 assert!(h.get(0).is_none(), "empty heap, slot 0 is out of range");
2376 assert!(h.get(9999).is_none());
2377 }
2378
2379 #[test]
2380 fn heap_get_mut_mutates_the_object_in_place() {
2381 let mut h = Heap::new();
2382 let slot = h.alloc(HeapObject::Str("a".to_string())).expect("alloc");
2383 if let Some(HeapObject::Str(s)) = h.get_mut(slot) {
2384 s.push('b');
2385 }
2386 assert!(h.get(slot) == Some(&HeapObject::Str("ab".to_string())));
2387 }
2388
2389 #[test]
2390 fn heap_inc_then_dec_keeps_the_slot_alive_until_count_reaches_zero() {
2391 let mut h = Heap::new();
2392 let slot = h.alloc(HeapObject::Int(7)).expect("alloc"); // refcount 1
2393 h.inc(slot); // refcount 2
2394 // a dec from 2 -> 1 leaves the slot alive and returns None.
2395 assert!(
2396 h.dec(slot).is_none(),
2397 "dec to a positive count returns None"
2398 );
2399 assert!(h.get(slot) == Some(&HeapObject::Int(7)), "slot still alive");
2400 }
2401
2402 #[test]
2403 fn heap_dec_to_zero_frees_the_slot_and_returns_the_freed_object() {
2404 let mut h = Heap::new();
2405 let slot = h.alloc(HeapObject::Int(99)).expect("alloc"); // refcount 1
2406 // the dec that drives the count to zero hands back the freed object.
2407 assert!(
2408 h.dec(slot) == Some(HeapObject::Int(99)),
2409 "dec to zero returns the freed object"
2410 );
2411 // the freed slot no longer reads as a live object.
2412 assert!(h.get(slot).is_none(), "a freed slot reads as None");
2413 }
2414
2415 #[test]
2416 fn heap_dec_returns_the_freed_file_handle_so_the_caller_can_leak_check() {
2417 // the locked contract: dec hands back the freed object so a caller can
2418 // see a still-open FileHandle and log a leak.
2419 let mut h = Heap::new();
2420 let handle = HeapObject::FileHandle {
2421 path: "data.txt".to_string(),
2422 content: String::new(),
2423 closed: false,
2424 };
2425 let slot = h.alloc(handle.clone()).expect("alloc");
2426 let freed = h.dec(slot).expect("dec to zero returns the object");
2427 // HeapObject has no Debug; the non-FileHandle arm reports in words.
2428 match freed {
2429 HeapObject::FileHandle { closed, path, .. } => {
2430 assert!(!closed, "the freed handle is still open -- a leak");
2431 assert_eq!(path, "data.txt");
2432 }
2433 _ => panic!("expected a FileHandle from dec, got another variant"),
2434 }
2435 }
2436
2437 #[test]
2438 fn heap_dec_of_a_bad_or_freed_slot_is_none_not_a_panic() {
2439 let mut h = Heap::new();
2440 // a slot that was never allocated.
2441 assert!(h.dec(0).is_none());
2442 // a slot freed once cannot be freed again.
2443 let slot = h.alloc(HeapObject::Int(1)).expect("alloc");
2444 assert!(h.dec(slot).is_some(), "first dec frees it");
2445 assert!(
2446 h.dec(slot).is_none(),
2447 "a second dec of a freed slot is None"
2448 );
2449 }
2450
2451 #[test]
2452 fn heap_alloc_reuses_a_freed_slot_before_growing_the_slab() {
2453 let mut h = Heap::new();
2454 let first = h.alloc(HeapObject::Int(1)).expect("alloc first");
2455 // free it, then allocate again -- the new object must reuse the slot.
2456 h.dec(first);
2457 let reused = h.alloc(HeapObject::Int(2)).expect("alloc reused");
2458 assert_eq!(
2459 reused, first,
2460 "a freed slot index is reused by the next alloc"
2461 );
2462 assert!(h.get(reused) == Some(&HeapObject::Int(2)));
2463 }
2464
2465 #[test]
2466 fn heap_inc_of_a_bad_slot_is_a_silent_no_op() {
2467 let mut h = Heap::new();
2468 // inc on a never-allocated slot must not panic and must not create one.
2469 h.inc(0);
2470 h.inc(12345);
2471 assert!(h.get(0).is_none());
2472 }
2473
2474 #[test]
2475 fn heap_caps_are_the_documented_values() {
2476 // the three caps the threat model depends on -- lock them so a future
2477 // edit that loosens a cap is visible here.
2478 assert_eq!(MAX_FRAMES, 1024);
2479 assert_eq!(MAX_STACK, 65536);
2480 assert_eq!(MAX_HEAP, 1_000_000);
2481 }
2482
2483 // ---- Vm construction + helpers ----
2484
2485 #[test]
2486 fn vm_new_pushes_the_initial_main_frame() {
2487 let mut p = Program::new();
2488 p.chunks.push(Chunk::new());
2489 p.chunks.push(Chunk::new());
2490 p.fn_names.push("first".to_string());
2491 p.fn_names.push("main".to_string());
2492 p.main_index = 1;
2493 let vm = Vm::new(p, String::new());
2494 // exactly one frame, pointed at main_index, ip 0.
2495 assert_eq!(vm.frames.len(), 1);
2496 let f = vm.frame().expect("the main frame exists");
2497 assert_eq!(f.chunk_idx, 1);
2498 assert_eq!(f.ip, 0);
2499 assert_eq!(f.base, 0);
2500 }
2501
2502 #[test]
2503 fn vm_push_then_pop_round_trips_a_value() {
2504 let vm_program = program_with(Chunk::new());
2505 let mut vm = Vm::new(vm_program, String::new());
2506 vm.push(Value::bool(true)).expect("push");
2507 let v = vm.pop().expect("pop");
2508 assert_eq!(v.as_bool(), Some(true));
2509 }
2510
2511 #[test]
2512 fn vm_pop_on_an_empty_stack_is_a_runtime_underflow_not_a_panic() {
2513 let mut vm = Vm::new(program_with(Chunk::new()), String::new());
2514 // Value has no Debug, so the failure arm cannot print the Ok payload;
2515 // it reports the error variant in words instead.
2516 match vm.pop() {
2517 Err(QalaError::Runtime { message, .. }) => {
2518 assert!(message.contains("underflow"), "got: {message}");
2519 }
2520 Err(other) => panic!("expected a Runtime underflow, got {other:?}"),
2521 Ok(_) => panic!("expected a Runtime underflow, got Ok(value)"),
2522 }
2523 }
2524
2525 #[test]
2526 fn vm_push_past_max_stack_is_a_runtime_overflow_not_a_panic() {
2527 let mut vm = Vm::new(program_with(Chunk::new()), String::new());
2528 // fill the stack directly to the cap, then one more push must error.
2529 vm.stack.resize(MAX_STACK, Value::void());
2530 match vm.push(Value::void()) {
2531 Err(QalaError::Runtime { message, .. }) => {
2532 assert!(message.contains("overflow"), "got: {message}");
2533 }
2534 other => panic!("expected a Runtime overflow, got {other:?}"),
2535 }
2536 }
2537
2538 #[test]
2539 fn vm_runtime_err_carries_the_source_line_of_the_current_instruction() {
2540 // a chunk whose byte 0 maps to source line 3; the error span must
2541 // cover line 3's byte range.
2542 let mut chunk = Chunk::new();
2543 chunk.code.push(0);
2544 chunk.source_lines.push(3);
2545 let src = "line one\nline two\nline three\nline four".to_string();
2546 let vm = Vm::new(program_with(chunk), src.clone());
2547 let err = vm.runtime_err("boom");
2548 let span = err.span();
2549 // line 3 is "line three" -- the span slices back to exactly that text.
2550 assert_eq!(span.slice(&src), "line three");
2551 }
2552
2553 #[test]
2554 fn vm_runtime_err_on_a_missing_source_line_is_a_zero_width_span_not_a_panic() {
2555 // an empty chunk: ip 0 has no source_lines entry. the error must still
2556 // build, with a harmless zero-width span.
2557 let vm = Vm::new(program_with(Chunk::new()), "anything".to_string());
2558 let err = vm.runtime_err("boom");
2559 let span = err.span();
2560 assert_eq!(span.len, 0, "an unresolved line yields a zero-width span");
2561 }
2562
2563 #[test]
2564 fn line_span_of_a_line_past_the_source_is_a_zero_width_span_not_a_panic() {
2565 let src = "only one line";
2566 let index = LineIndex::new(src);
2567 // line 99 does not exist; the span must be zero-width, no panic.
2568 let span = line_span(&index, src, 99);
2569 assert_eq!(span.len, 0);
2570 }
2571
2572 // ---- dispatch + malformed bytecode + stack/local opcodes ----
2573
2574 /// emit a `CONST idx` instruction for the pool entry `v` on `line`, return
2575 /// the pool index. a building block for the dispatch tests.
2576 fn emit_const(chunk: &mut Chunk, v: ConstValue, line: u32) {
2577 let idx = chunk.add_constant(v);
2578 chunk.write_op(Opcode::Const, line);
2579 chunk.write_u16(idx, line);
2580 }
2581
2582 #[test]
2583 fn dispatch_runs_a_const_then_pop_program_clean() {
2584 // CONST 7 ; POP ; HALT -- runs to Halted with an empty stack.
2585 let mut chunk = Chunk::new();
2586 emit_const(&mut chunk, ConstValue::I64(7), 1);
2587 chunk.write_op(Opcode::Pop, 1);
2588 chunk.write_op(Opcode::Halt, 1);
2589 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2590 vm.run().expect("a CONST/POP/HALT program runs clean");
2591 assert!(vm.stack.is_empty(), "POP must leave the stack empty");
2592 }
2593
2594 #[test]
2595 fn dispatch_const_of_an_i64_pushes_a_pointer_to_a_heap_int() {
2596 // CONST 42 ; HALT -- the stack top is a pointer to a HeapObject::Int.
2597 let mut chunk = Chunk::new();
2598 emit_const(&mut chunk, ConstValue::I64(42), 1);
2599 chunk.write_op(Opcode::Halt, 1);
2600 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2601 vm.run().expect("run");
2602 let top = *vm.stack.last().expect("the CONST left a value");
2603 assert_eq!(top.as_function(), None, "an i64 constant is not a function");
2604 let slot = top
2605 .as_pointer()
2606 .expect("an i64 constant must push a heap pointer");
2607 assert!(
2608 vm.heap.get(slot) == Some(&HeapObject::Int(42)),
2609 "the pointer must reach a heap Int(42)"
2610 );
2611 }
2612
2613 #[test]
2614 fn dispatch_const_of_a_bool_pushes_a_tagged_scalar_not_a_pointer() {
2615 let mut chunk = Chunk::new();
2616 emit_const(&mut chunk, ConstValue::Bool(true), 1);
2617 chunk.write_op(Opcode::Halt, 1);
2618 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2619 vm.run().expect("run");
2620 let top = *vm.stack.last().expect("value");
2621 assert_eq!(top.as_bool(), Some(true));
2622 assert_eq!(top.as_pointer(), None, "a bool is not a heap pointer");
2623 }
2624
2625 #[test]
2626 fn dispatch_const_of_a_function_pushes_a_function_value_carrying_the_id() {
2627 // a ConstValue::Function becomes a tagged Value::function, no heap
2628 // object -- the locked representation 05-05's map/filter/reduce read.
2629 let mut chunk = Chunk::new();
2630 emit_const(&mut chunk, ConstValue::Function(13), 1);
2631 chunk.write_op(Opcode::Halt, 1);
2632 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2633 vm.run().expect("run");
2634 let top = *vm.stack.last().expect("value");
2635 assert_eq!(top.as_function(), Some(13), "fn-id must round-trip");
2636 assert_eq!(top.as_pointer(), None, "a function is not a heap pointer");
2637 }
2638
2639 #[test]
2640 fn dispatch_dup_duplicates_the_top_value() {
2641 // CONST true ; DUP ; HALT -- two equal values on the stack.
2642 let mut chunk = Chunk::new();
2643 emit_const(&mut chunk, ConstValue::Bool(true), 1);
2644 chunk.write_op(Opcode::Dup, 1);
2645 chunk.write_op(Opcode::Halt, 1);
2646 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2647 vm.run().expect("run");
2648 assert_eq!(vm.stack.len(), 2, "DUP pushes a copy");
2649 assert!(vm.stack[0] == vm.stack[1], "the copy equals the original");
2650 }
2651
2652 #[test]
2653 fn dispatch_set_local_then_get_local_round_trips_a_value() {
2654 // CONST b'\x05 ; SET_LOCAL 0 ; GET_LOCAL 0 ; HALT.
2655 let mut chunk = Chunk::new();
2656 emit_const(&mut chunk, ConstValue::Byte(5), 1);
2657 chunk.write_op(Opcode::SetLocal, 1);
2658 chunk.write_u16(0, 1);
2659 chunk.write_op(Opcode::GetLocal, 1);
2660 chunk.write_u16(0, 1);
2661 chunk.write_op(Opcode::Halt, 1);
2662 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2663 vm.run().expect("run");
2664 let top = *vm.stack.last().expect("GET_LOCAL pushed a value");
2665 assert_eq!(top.as_byte(), Some(5), "the local round-trips");
2666 }
2667
2668 #[test]
2669 fn dispatch_get_local_of_an_unset_slot_is_a_runtime_error_not_a_panic() {
2670 // GET_LOCAL 4 with no locals set -- a clean Runtime error.
2671 let mut chunk = Chunk::new();
2672 chunk.write_op(Opcode::GetLocal, 1);
2673 chunk.write_u16(4, 1);
2674 chunk.write_op(Opcode::Halt, 1);
2675 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2676 match vm.run() {
2677 Err(QalaError::Runtime { message, .. }) => {
2678 assert!(message.contains("bad local slot"), "got: {message}");
2679 }
2680 other => panic!("expected a Runtime bad-local error, got {other:?}"),
2681 }
2682 }
2683
2684 #[test]
2685 fn dispatch_set_global_then_get_global_round_trips_a_value() {
2686 let mut chunk = Chunk::new();
2687 emit_const(&mut chunk, ConstValue::Bool(false), 1);
2688 chunk.write_op(Opcode::SetGlobal, 1);
2689 chunk.write_u16(0, 1);
2690 chunk.write_op(Opcode::GetGlobal, 1);
2691 chunk.write_u16(0, 1);
2692 chunk.write_op(Opcode::Halt, 1);
2693 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2694 vm.run().expect("run");
2695 let top = *vm.stack.last().expect("GET_GLOBAL pushed a value");
2696 assert_eq!(top.as_bool(), Some(false));
2697 }
2698
2699 #[test]
2700 fn malformed_a_bad_opcode_byte_is_a_runtime_error_not_a_panic() {
2701 // byte 46 decodes to no opcode (the dense set ends at 45).
2702 let mut chunk = Chunk::new();
2703 chunk.code.push(46);
2704 chunk.source_lines.push(1);
2705 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2706 match vm.run() {
2707 Err(QalaError::Runtime { message, .. }) => {
2708 assert!(message.contains("bad opcode byte"), "got: {message}");
2709 }
2710 other => panic!("expected a Runtime bad-opcode error, got {other:?}"),
2711 }
2712 }
2713
2714 #[test]
2715 fn malformed_a_truncated_operand_is_a_runtime_error_not_a_panic() {
2716 // a CONST opcode with only one of its two operand bytes present.
2717 let mut chunk = Chunk::new();
2718 chunk.code.push(Opcode::Const as u8);
2719 chunk.code.push(0); // only one operand byte; CONST needs two
2720 chunk.source_lines.push(1);
2721 chunk.source_lines.push(1);
2722 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2723 match vm.run() {
2724 Err(QalaError::Runtime { message, .. }) => {
2725 assert!(message.contains("truncated operand"), "got: {message}");
2726 }
2727 other => panic!("expected a Runtime truncated-operand error, got {other:?}"),
2728 }
2729 }
2730
2731 #[test]
2732 fn malformed_a_bad_constant_index_is_a_runtime_error_not_a_panic() {
2733 // CONST 9 with an empty constant pool.
2734 let mut chunk = Chunk::new();
2735 chunk.write_op(Opcode::Const, 1);
2736 chunk.write_u16(9, 1);
2737 chunk.write_op(Opcode::Halt, 1);
2738 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2739 match vm.run() {
2740 Err(QalaError::Runtime { message, .. }) => {
2741 assert!(message.contains("bad constant index"), "got: {message}");
2742 }
2743 other => panic!("expected a Runtime bad-constant error, got {other:?}"),
2744 }
2745 }
2746
2747 #[test]
2748 fn a_call_to_a_stdlib_fn_id_dispatches_to_the_native_function() {
2749 // every opcode has a real handler and the `call_stdlib` seam is wired:
2750 // a CALL with fn-id >= STDLIB_FN_BASE runs the native stdlib function.
2751 // fn-id 40001 is `println`; CALL it with a string argument and the
2752 // rendered text lands in the console. (CONST 'hi' ; CALL 40001 ; POP ;
2753 // RETURN -- POP discards println's void result.)
2754 let mut chunk = Chunk::new();
2755 emit_const(&mut chunk, ConstValue::Str("hi".to_string()), 1);
2756 chunk.write_op(Opcode::Call, 1);
2757 chunk.write_u16(STDLIB_FN_BASE + 1, 1);
2758 chunk.code.push(1); // argc 1
2759 chunk.source_lines.push(1);
2760 chunk.write_op(Opcode::Pop, 1);
2761 chunk.write_op(Opcode::Return, 1);
2762 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2763 vm.run().expect("a println CALL runs clean");
2764 assert_eq!(
2765 vm.console,
2766 vec!["hi\n".to_string()],
2767 "the native println wrote its argument to the console"
2768 );
2769 }
2770
2771 #[test]
2772 fn dispatch_an_ip_past_the_end_of_the_chunk_is_a_runtime_error() {
2773 // an empty chunk -- ip 0 is already past the (zero-length) code.
2774 let mut vm = Vm::new(program_with(Chunk::new()), "x".to_string());
2775 match vm.run() {
2776 Err(QalaError::Runtime { message, .. }) => {
2777 assert!(
2778 message.contains("instruction pointer past end"),
2779 "got: {message}"
2780 );
2781 }
2782 other => panic!("expected a Runtime ip-past-end error, got {other:?}"),
2783 }
2784 }
2785
2786 #[test]
2787 fn step_advances_ip_by_one_full_instruction_each_call() {
2788 // CONST 1 (3 bytes) ; POP (1 byte) ; HALT (1 byte).
2789 let mut chunk = Chunk::new();
2790 emit_const(&mut chunk, ConstValue::I64(1), 1);
2791 chunk.write_op(Opcode::Pop, 1);
2792 chunk.write_op(Opcode::Halt, 1);
2793 let mut vm = Vm::new(program_with(chunk), "x".to_string());
2794 // before the first step, ip is 0.
2795 assert_eq!(vm.frame().expect("frame").ip, 0);
2796 // step 1 executes CONST -- a 3-byte instruction, ip moves 0 -> 3.
2797 assert_eq!(vm.step().expect("step 1"), StepOutcome::Ran);
2798 assert_eq!(vm.frame().expect("frame").ip, 3, "CONST advances ip by 3");
2799 assert_eq!(vm.stack.len(), 1, "CONST pushed one value");
2800 // step 2 executes POP -- a 1-byte instruction, ip moves 3 -> 4.
2801 assert_eq!(vm.step().expect("step 2"), StepOutcome::Ran);
2802 assert_eq!(vm.frame().expect("frame").ip, 4, "POP advances ip by 1");
2803 assert_eq!(vm.stack.len(), 0, "POP cleared the stack");
2804 // step 3 executes HALT.
2805 assert_eq!(vm.step().expect("step 3"), StepOutcome::Halted);
2806 }
2807
2808 #[test]
2809 fn run_and_step_share_dispatch_one_reaching_the_same_end_state() {
2810 // a small program run two ways: run() to completion, and step() in a
2811 // loop. both must leave the same value-stack state.
2812 let build = || {
2813 let mut chunk = Chunk::new();
2814 emit_const(&mut chunk, ConstValue::Bool(true), 1);
2815 emit_const(&mut chunk, ConstValue::Bool(false), 1);
2816 chunk.write_op(Opcode::Halt, 1);
2817 program_with(chunk)
2818 };
2819 let mut via_run = Vm::new(build(), "x".to_string());
2820 via_run.run().expect("run");
2821
2822 let mut via_step = Vm::new(build(), "x".to_string());
2823 // step until Halted: the loop body is empty, the work is the step call.
2824 while via_step.step().expect("step") == StepOutcome::Ran {}
2825 assert_eq!(via_run.stack.len(), via_step.stack.len());
2826 assert!(
2827 via_run.stack.len() == 2
2828 && via_run.stack[0] == via_step.stack[0]
2829 && via_run.stack[1] == via_step.stack[1],
2830 "run and step must reach the same stack state"
2831 );
2832 }
2833
2834 // ---- arithmetic / comparison / logic / jump opcodes ----
2835
2836 /// run a chunk and return the VM so the caller can inspect the end state.
2837 fn run_chunk(chunk: Chunk, src: &str) -> Result<Vm, QalaError> {
2838 let mut vm = Vm::new(program_with(chunk), src.to_string());
2839 vm.run()?;
2840 Ok(vm)
2841 }
2842
2843 /// the `i64` the stack top decodes to: a pointer to a heap `Int`.
2844 fn top_i64(vm: &Vm) -> i64 {
2845 let top = *vm.stack.last().expect("a value on the stack");
2846 let slot = top.as_pointer().expect("the result is a heap pointer");
2847 match vm.heap.get(slot) {
2848 Some(HeapObject::Int(n)) => *n,
2849 _ => panic!("the result pointer does not reach a heap Int"),
2850 }
2851 }
2852
2853 /// build `CONST a ; CONST b ; <op> ; HALT` for two i64 literals.
2854 fn binary_i64_chunk(a: i64, b: i64, op: Opcode) -> Chunk {
2855 let mut chunk = Chunk::new();
2856 emit_const(&mut chunk, ConstValue::I64(a), 1);
2857 emit_const(&mut chunk, ConstValue::I64(b), 1);
2858 chunk.write_op(op, 1);
2859 chunk.write_op(Opcode::Halt, 1);
2860 chunk
2861 }
2862
2863 #[test]
2864 fn arith_add_sub_mul_compute_correct_i64_results() {
2865 let add = run_chunk(binary_i64_chunk(20, 22, Opcode::Add), "x").expect("add");
2866 assert_eq!(top_i64(&add), 42);
2867 let sub = run_chunk(binary_i64_chunk(50, 8, Opcode::Sub), "x").expect("sub");
2868 assert_eq!(top_i64(&sub), 42);
2869 let mul = run_chunk(binary_i64_chunk(6, 7, Opcode::Mul), "x").expect("mul");
2870 assert_eq!(top_i64(&mul), 42);
2871 }
2872
2873 #[test]
2874 fn arith_div_and_mod_compute_correct_i64_results() {
2875 // 85 / 2 == 42 (truncated toward zero); 85 % 2 == 1.
2876 let div = run_chunk(binary_i64_chunk(85, 2, Opcode::Div), "x").expect("div");
2877 assert_eq!(top_i64(&div), 42);
2878 let modr = run_chunk(binary_i64_chunk(85, 2, Opcode::Mod), "x").expect("mod");
2879 assert_eq!(top_i64(&modr), 1);
2880 }
2881
2882 #[test]
2883 fn arith_neg_negates_an_i64() {
2884 // CONST 42 ; NEG ; HALT.
2885 let mut chunk = Chunk::new();
2886 emit_const(&mut chunk, ConstValue::I64(42), 1);
2887 chunk.write_op(Opcode::Neg, 1);
2888 chunk.write_op(Opcode::Halt, 1);
2889 let vm = run_chunk(chunk, "x").expect("neg");
2890 assert_eq!(top_i64(&vm), -42);
2891 }
2892
2893 #[test]
2894 fn arith_add_overflow_is_a_runtime_error_not_a_wraparound() {
2895 // i64::MAX + 1 must error, not wrap to i64::MIN.
2896 let chunk = binary_i64_chunk(i64::MAX, 1, Opcode::Add);
2897 match run_chunk(chunk, "x") {
2898 Err(QalaError::Runtime { message, .. }) => {
2899 assert!(message.contains("integer overflow"), "got: {message}");
2900 }
2901 Err(other) => panic!("expected an overflow Runtime error, got {other:?}"),
2902 Ok(_) => panic!("expected an overflow Runtime error, the program ran clean"),
2903 }
2904 }
2905
2906 #[test]
2907 fn arith_neg_of_i64_min_is_a_runtime_overflow() {
2908 // -i64::MIN does not fit in i64.
2909 let mut chunk = Chunk::new();
2910 emit_const(&mut chunk, ConstValue::I64(i64::MIN), 1);
2911 chunk.write_op(Opcode::Neg, 1);
2912 chunk.write_op(Opcode::Halt, 1);
2913 match run_chunk(chunk, "x") {
2914 Err(QalaError::Runtime { message, .. }) => {
2915 assert!(message.contains("integer overflow"), "got: {message}");
2916 }
2917 Err(other) => panic!("expected an overflow Runtime error, got {other:?}"),
2918 Ok(_) => panic!("expected an overflow Runtime error, the program ran clean"),
2919 }
2920 }
2921
2922 #[test]
2923 fn div_by_zero_is_a_runtime_error_carrying_the_div_opcode_source_line() {
2924 // the DIV opcode sits on source line 4; the error span must cover it.
2925 let mut chunk = Chunk::new();
2926 emit_const(&mut chunk, ConstValue::I64(1), 1);
2927 emit_const(&mut chunk, ConstValue::I64(0), 1);
2928 chunk.write_op(Opcode::Div, 4);
2929 chunk.write_op(Opcode::Halt, 4);
2930 let src = "one\ntwo\nthree\nfour line here\nfive";
2931 match run_chunk(chunk, src) {
2932 Err(QalaError::Runtime { message, span }) => {
2933 assert!(message.contains("division by zero"), "got: {message}");
2934 assert_eq!(
2935 span.slice(src),
2936 "four line here",
2937 "the error must point at the DIV's source line"
2938 );
2939 }
2940 Err(other) => panic!("expected a division-by-zero Runtime error, got {other:?}"),
2941 Ok(_) => panic!("expected a division-by-zero error, the program ran clean"),
2942 }
2943 }
2944
2945 #[test]
2946 fn div_by_zero_mod_form_is_a_runtime_modulo_by_zero_error() {
2947 // MOD by a zero divisor reports "modulo by zero", on the MOD's line.
2948 let mut chunk = Chunk::new();
2949 emit_const(&mut chunk, ConstValue::I64(7), 1);
2950 emit_const(&mut chunk, ConstValue::I64(0), 1);
2951 chunk.write_op(Opcode::Mod, 2);
2952 chunk.write_op(Opcode::Halt, 2);
2953 let src = "first line\nsecond line is the mod";
2954 match run_chunk(chunk, src) {
2955 Err(QalaError::Runtime { message, span }) => {
2956 assert!(message.contains("modulo by zero"), "got: {message}");
2957 assert_eq!(span.slice(src), "second line is the mod");
2958 }
2959 Err(other) => panic!("expected a modulo-by-zero Runtime error, got {other:?}"),
2960 Ok(_) => panic!("expected a modulo-by-zero error, the program ran clean"),
2961 }
2962 }
2963
2964 #[test]
2965 fn div_of_i64_min_by_negative_one_is_caught_as_a_runtime_error() {
2966 // i64::MIN / -1 overflows; checked_div returns None and the VM reports
2967 // it on the division-by-zero path (one None check covers both faults).
2968 let chunk = binary_i64_chunk(i64::MIN, -1, Opcode::Div);
2969 match run_chunk(chunk, "x") {
2970 Err(QalaError::Runtime { message, .. }) => {
2971 assert!(message.contains("division by zero"), "got: {message}");
2972 }
2973 Err(other) => panic!("expected a Runtime error for i64::MIN / -1, got {other:?}"),
2974 Ok(_) => panic!("expected a Runtime error for i64::MIN / -1, ran clean"),
2975 }
2976 }
2977
2978 /// build `CONST a ; CONST b ; <op> ; HALT` for two f64 literals.
2979 fn binary_f64_chunk(a: f64, b: f64, op: Opcode) -> Chunk {
2980 let mut chunk = Chunk::new();
2981 emit_const(&mut chunk, ConstValue::F64(a), 1);
2982 emit_const(&mut chunk, ConstValue::F64(b), 1);
2983 chunk.write_op(op, 1);
2984 chunk.write_op(Opcode::Halt, 1);
2985 chunk
2986 }
2987
2988 #[test]
2989 fn float_arith_add_sub_mul_neg_compute_correct_f64_results() {
2990 let add = run_chunk(binary_f64_chunk(1.5, 2.0, Opcode::FAdd), "x").expect("fadd");
2991 assert_eq!(add.stack.last().unwrap().as_f64(), Some(3.5));
2992 let sub = run_chunk(binary_f64_chunk(5.0, 1.5, Opcode::FSub), "x").expect("fsub");
2993 assert_eq!(sub.stack.last().unwrap().as_f64(), Some(3.5));
2994 let mul = run_chunk(binary_f64_chunk(2.0, 1.75, Opcode::FMul), "x").expect("fmul");
2995 assert_eq!(mul.stack.last().unwrap().as_f64(), Some(3.5));
2996 // FNEG flips the sign.
2997 let mut neg = Chunk::new();
2998 emit_const(&mut neg, ConstValue::F64(3.5), 1);
2999 neg.write_op(Opcode::FNeg, 1);
3000 neg.write_op(Opcode::Halt, 1);
3001 let negv = run_chunk(neg, "x").expect("fneg");
3002 assert_eq!(negv.stack.last().unwrap().as_f64(), Some(-3.5));
3003 }
3004
3005 #[test]
3006 fn float_arith_div_by_zero_is_ieee754_inf_not_an_error() {
3007 // 1.0 / 0.0 == inf, with NO runtime error -- IEEE 754.
3008 let vm = run_chunk(binary_f64_chunk(1.0, 0.0, Opcode::FDiv), "x")
3009 .expect("float division by zero must not error");
3010 let top = vm.stack.last().unwrap().as_f64().expect("an f64 result");
3011 assert_eq!(top, f64::INFINITY, "1.0 / 0.0 is positive infinity");
3012 }
3013
3014 #[test]
3015 fn float_arith_zero_over_zero_is_ieee754_nan_not_an_error() {
3016 // 0.0 / 0.0 == NaN, with NO runtime error -- IEEE 754.
3017 let vm = run_chunk(binary_f64_chunk(0.0, 0.0, Opcode::FDiv), "x")
3018 .expect("0.0 / 0.0 must not error");
3019 let top = vm.stack.last().unwrap().as_f64().expect("an f64 result");
3020 assert!(top.is_nan(), "0.0 / 0.0 is NaN");
3021 }
3022
3023 #[test]
3024 fn compare_i64_eq_ne_lt_le_gt_ge_produce_correct_bools() {
3025 // helper: run CONST a ; CONST b ; <cmp> and read the bool result.
3026 let cmp = |a: i64, b: i64, op: Opcode| -> bool {
3027 let vm = run_chunk(binary_i64_chunk(a, b, op), "x").expect("compare");
3028 vm.stack.last().unwrap().as_bool().expect("a bool result")
3029 };
3030 assert!(cmp(3, 3, Opcode::Eq));
3031 assert!(!cmp(3, 4, Opcode::Eq));
3032 assert!(cmp(3, 4, Opcode::Ne));
3033 assert!(cmp(3, 4, Opcode::Lt));
3034 assert!(!cmp(4, 3, Opcode::Lt));
3035 assert!(cmp(3, 3, Opcode::Le));
3036 assert!(cmp(5, 4, Opcode::Gt));
3037 assert!(cmp(4, 4, Opcode::Ge));
3038 assert!(!cmp(3, 4, Opcode::Ge));
3039 }
3040
3041 #[test]
3042 fn compare_str_eq_and_lt_compare_lexicographically() {
3043 // CONST "apple" ; CONST "banana" ; <cmp>.
3044 let cmp = |a: &str, b: &str, op: Opcode| -> bool {
3045 let mut chunk = Chunk::new();
3046 emit_const(&mut chunk, ConstValue::Str(a.to_string()), 1);
3047 emit_const(&mut chunk, ConstValue::Str(b.to_string()), 1);
3048 chunk.write_op(op, 1);
3049 chunk.write_op(Opcode::Halt, 1);
3050 let vm = run_chunk(chunk, "x").expect("str compare");
3051 vm.stack.last().unwrap().as_bool().expect("a bool result")
3052 };
3053 assert!(cmp("apple", "apple", Opcode::Eq));
3054 assert!(!cmp("apple", "banana", Opcode::Eq));
3055 assert!(cmp("apple", "banana", Opcode::Lt), "apple < banana");
3056 assert!(!cmp("banana", "apple", Opcode::Lt));
3057 assert!(cmp("apple", "banana", Opcode::Ne));
3058 }
3059
3060 #[test]
3061 fn compare_bool_eq_and_ordering_follow_false_lt_true() {
3062 let cmp = |a: bool, b: bool, op: Opcode| -> bool {
3063 let mut chunk = Chunk::new();
3064 emit_const(&mut chunk, ConstValue::Bool(a), 1);
3065 emit_const(&mut chunk, ConstValue::Bool(b), 1);
3066 chunk.write_op(op, 1);
3067 chunk.write_op(Opcode::Halt, 1);
3068 let vm = run_chunk(chunk, "x").expect("bool compare");
3069 vm.stack.last().unwrap().as_bool().expect("a bool result")
3070 };
3071 assert!(cmp(true, true, Opcode::Eq));
3072 assert!(cmp(false, true, Opcode::Ne));
3073 // the documented bool ordering: false < true.
3074 assert!(cmp(false, true, Opcode::Lt));
3075 assert!(!cmp(true, false, Opcode::Lt));
3076 }
3077
3078 #[test]
3079 fn compare_mismatched_operand_types_is_a_runtime_error_not_a_panic() {
3080 // CONST 1 (heap Int) ; CONST "x" (heap Str) ; EQ -- a kind mismatch.
3081 let mut chunk = Chunk::new();
3082 emit_const(&mut chunk, ConstValue::I64(1), 1);
3083 emit_const(&mut chunk, ConstValue::Str("x".to_string()), 1);
3084 chunk.write_op(Opcode::Eq, 1);
3085 chunk.write_op(Opcode::Halt, 1);
3086 match run_chunk(chunk, "x") {
3087 Err(QalaError::Runtime { message, .. }) => {
3088 assert!(message.contains("cannot compare"), "got: {message}");
3089 }
3090 Err(other) => panic!("expected a Runtime compare-mismatch error, got {other:?}"),
3091 Ok(_) => panic!("expected a compare-mismatch error, the program ran clean"),
3092 }
3093 }
3094
3095 #[test]
3096 fn compare_f64_eq_lt_ge_follow_ieee754_including_nan() {
3097 let cmp = |a: f64, b: f64, op: Opcode| -> bool {
3098 let vm = run_chunk(binary_f64_chunk(a, b, op), "x").expect("f64 compare");
3099 vm.stack.last().unwrap().as_bool().expect("a bool result")
3100 };
3101 assert!(cmp(1.5, 1.5, Opcode::FEq));
3102 assert!(cmp(1.0, 2.0, Opcode::FLt));
3103 assert!(cmp(2.0, 2.0, Opcode::FGe));
3104 // IEEE 754: NaN compares unequal to itself and is unordered.
3105 assert!(!cmp(f64::NAN, f64::NAN, Opcode::FEq), "NaN == NaN is false");
3106 assert!(cmp(f64::NAN, f64::NAN, Opcode::FNe), "NaN != NaN is true");
3107 assert!(!cmp(f64::NAN, 1.0, Opcode::FLt), "NaN < x is false");
3108 assert!(!cmp(f64::NAN, 1.0, Opcode::FGt), "NaN > x is false");
3109 }
3110
3111 #[test]
3112 fn logic_not_negates_a_bool() {
3113 let mut chunk = Chunk::new();
3114 emit_const(&mut chunk, ConstValue::Bool(true), 1);
3115 chunk.write_op(Opcode::Not, 1);
3116 chunk.write_op(Opcode::Halt, 1);
3117 let vm = run_chunk(chunk, "x").expect("not");
3118 assert_eq!(vm.stack.last().unwrap().as_bool(), Some(false));
3119 }
3120
3121 #[test]
3122 fn jumps_jump_moves_ip_over_a_skipped_instruction() {
3123 // CONST true ; JUMP +4 (skip the next CONST) ; CONST false ; HALT.
3124 // a JUMP operand is relative to the byte after the operand. the JUMP
3125 // opcode is at byte 3, its operand at 4..5, fall-through at 6; the
3126 // CONST-false instruction is bytes 6..8; HALT is byte 9. to land on
3127 // HALT (byte 9) from fall-through 6 the offset is +3.
3128 let mut chunk = Chunk::new();
3129 emit_const(&mut chunk, ConstValue::Bool(true), 1); // bytes 0..2
3130 chunk.write_op(Opcode::Jump, 1); // byte 3
3131 chunk.write_i16(3, 1); // bytes 4..5, fall-through is 6
3132 emit_const(&mut chunk, ConstValue::Bool(false), 1); // bytes 6..8 (skipped)
3133 chunk.write_op(Opcode::Halt, 1); // byte 9
3134 let vm = run_chunk(chunk, "x").expect("jump");
3135 // only the `true` CONST ran; the `false` CONST was jumped over.
3136 assert_eq!(vm.stack.len(), 1, "the skipped CONST left nothing");
3137 assert_eq!(vm.stack[0].as_bool(), Some(true));
3138 }
3139
3140 #[test]
3141 fn jumps_jump_if_false_branches_on_false_and_falls_through_on_true() {
3142 // build CONST <cond> ; JUMP_IF_FALSE +3 ; CONST 111 ; HALT.
3143 // on a false cond the CONST 111 is skipped; on true it runs.
3144 let outcome = |cond: bool| -> usize {
3145 let mut chunk = Chunk::new();
3146 emit_const(&mut chunk, ConstValue::Bool(cond), 1); // bytes 0..2
3147 chunk.write_op(Opcode::JumpIfFalse, 1); // byte 3
3148 chunk.write_i16(3, 1); // bytes 4..5, fall-through 6
3149 emit_const(&mut chunk, ConstValue::I64(111), 1); // bytes 6..8
3150 chunk.write_op(Opcode::Halt, 1); // byte 9
3151 let vm = run_chunk(chunk, "x").expect("jump_if_false");
3152 vm.stack.len()
3153 };
3154 // false: the cond is consumed, the CONST 111 is jumped over -> empty.
3155 assert_eq!(outcome(false), 0, "false branches past the CONST");
3156 // true: the cond is consumed, the CONST 111 runs -> one value.
3157 assert_eq!(outcome(true), 1, "true falls through to the CONST");
3158 }
3159
3160 #[test]
3161 fn jumps_jump_if_true_branches_on_true_and_falls_through_on_false() {
3162 let outcome = |cond: bool| -> usize {
3163 let mut chunk = Chunk::new();
3164 emit_const(&mut chunk, ConstValue::Bool(cond), 1);
3165 chunk.write_op(Opcode::JumpIfTrue, 1);
3166 chunk.write_i16(3, 1);
3167 emit_const(&mut chunk, ConstValue::I64(222), 1);
3168 chunk.write_op(Opcode::Halt, 1);
3169 let vm = run_chunk(chunk, "x").expect("jump_if_true");
3170 vm.stack.len()
3171 };
3172 // true branches past the CONST; false falls through to it.
3173 assert_eq!(outcome(true), 0, "true branches past the CONST");
3174 assert_eq!(outcome(false), 1, "false falls through to the CONST");
3175 }
3176
3177 #[test]
3178 fn jumps_a_backward_jump_lands_at_an_earlier_instruction() {
3179 // POP-free loop body would spin forever; instead verify a backward
3180 // offset lands correctly by stepping once. CONST true ; JUMP back to 0.
3181 // the JUMP at byte 3, fall-through 6; offset -6 targets byte 0.
3182 let mut chunk = Chunk::new();
3183 emit_const(&mut chunk, ConstValue::Bool(true), 1); // bytes 0..2
3184 chunk.write_op(Opcode::Jump, 1); // byte 3
3185 chunk.write_i16(-6, 1); // bytes 4..5; fall-through 6, target 0
3186 let mut vm = Vm::new(program_with(chunk), "x".to_string());
3187 vm.step().expect("step the CONST");
3188 assert_eq!(vm.frame().expect("frame").ip, 3, "ip after CONST is 3");
3189 vm.step().expect("step the JUMP");
3190 assert_eq!(
3191 vm.frame().expect("frame").ip,
3192 0,
3193 "the backward JUMP lands at 0"
3194 );
3195 }
3196
3197 #[test]
3198 fn jumps_a_jump_target_outside_the_chunk_is_a_runtime_error_not_a_panic() {
3199 // JUMP with an offset that lands far past the end of the code.
3200 let mut chunk = Chunk::new();
3201 chunk.write_op(Opcode::Jump, 1); // byte 0
3202 chunk.write_i16(1000, 1); // fall-through 3, target 1003 -- out of range
3203 match run_chunk(chunk, "x") {
3204 Err(QalaError::Runtime { message, .. }) => {
3205 assert!(
3206 message.contains("jump target out of range"),
3207 "got: {message}"
3208 );
3209 }
3210 Err(other) => panic!("expected a Runtime jump-out-of-range error, got {other:?}"),
3211 Ok(_) => panic!("expected a jump-out-of-range error, the program ran clean"),
3212 }
3213 }
3214
3215 #[test]
3216 fn arith_add_of_non_integer_operands_is_a_runtime_type_error() {
3217 // CONST true ; CONST false ; ADD -- ADD expects two heap Ints.
3218 let mut chunk = Chunk::new();
3219 emit_const(&mut chunk, ConstValue::Bool(true), 1);
3220 emit_const(&mut chunk, ConstValue::Bool(false), 1);
3221 chunk.write_op(Opcode::Add, 1);
3222 chunk.write_op(Opcode::Halt, 1);
3223 match run_chunk(chunk, "x") {
3224 Err(QalaError::Runtime { message, .. }) => {
3225 assert!(message.contains("expected an integer"), "got: {message}");
3226 }
3227 Err(other) => panic!("expected a Runtime type error, got {other:?}"),
3228 Ok(_) => panic!("expected a Runtime type error, the program ran clean"),
3229 }
3230 }
3231
3232 // ---- CALL / RETURN / the frame-depth cap ----
3233
3234 use crate::lexer::Lexer;
3235 use crate::parser::Parser;
3236 use crate::typechecker::check_program;
3237
3238 /// lex + parse + typecheck + compile a Qala source string into a runnable
3239 /// `Program`. panics on any pipeline error -- the compiled-program tests
3240 /// below all use known-good source.
3241 fn compile_qala(src: &str) -> Program {
3242 let tokens = Lexer::tokenize(src).expect("lex failed");
3243 let ast = Parser::parse(&tokens).expect("parse failed");
3244 let (typed, terrors, _) = check_program(&ast, src);
3245 assert!(terrors.is_empty(), "typecheck errors: {terrors:?}");
3246 crate::codegen::compile_program(&typed, src)
3247 .unwrap_or_else(|e| panic!("codegen errors: {e:?}"))
3248 }
3249
3250 /// the `i64` a finished program left on top of its value stack -- a
3251 /// program's `RETURN` from `main` leaves the result there.
3252 fn program_result_i64(vm: &Vm) -> i64 {
3253 let top = *vm.stack.last().expect("the program left a result value");
3254 let slot = top.as_pointer().expect("the result is a heap pointer");
3255 match vm.heap.get(slot) {
3256 Some(HeapObject::Int(n)) => *n,
3257 _ => panic!("the result pointer does not reach a heap Int"),
3258 }
3259 }
3260
3261 #[test]
3262 fn call_return_a_callee_returns_a_value_onto_the_callers_stack() {
3263 // a hand-built two-chunk program: main CALLs chunk 1, which returns the
3264 // i64 7; after the CALL main's stack top is that 7.
3265 // chunk 0 (main): CALL fn 1 argc 0 ; RETURN
3266 // chunk 1 (callee): CONST 7 ; RETURN
3267 let mut main = Chunk::new();
3268 main.write_op(Opcode::Call, 1);
3269 main.write_u16(1, 1); // fn-id 1
3270 main.code.push(0); // argc 0
3271 main.source_lines.push(1);
3272 main.write_op(Opcode::Return, 1);
3273
3274 let mut callee = Chunk::new();
3275 emit_const(&mut callee, ConstValue::I64(7), 1);
3276 callee.write_op(Opcode::Return, 1);
3277
3278 let mut p = Program::new();
3279 p.chunks.push(main);
3280 p.chunks.push(callee);
3281 p.fn_names.push("main".to_string());
3282 p.fn_names.push("callee".to_string());
3283 p.main_index = 0;
3284
3285 let mut vm = Vm::new(p, "x".to_string());
3286 vm.run().expect("the call/return program runs clean");
3287 // main's RETURN re-pushed the callee's result; it is the program value.
3288 assert_eq!(program_result_i64(&vm), 7);
3289 }
3290
3291 #[test]
3292 fn call_passes_arguments_into_the_callee_frames_local_slots() {
3293 // chunk 1 takes one argument and returns it via GET_LOCAL 0; main
3294 // passes 42 and the program result is 42 -- the arg reached slot 0.
3295 // chunk 0 (main): CONST 42 ; CALL fn 1 argc 1 ; RETURN
3296 // chunk 1 (id): GET_LOCAL 0 ; RETURN
3297 let mut main = Chunk::new();
3298 emit_const(&mut main, ConstValue::I64(42), 1);
3299 main.write_op(Opcode::Call, 1);
3300 main.write_u16(1, 1);
3301 main.code.push(1); // argc 1
3302 main.source_lines.push(1);
3303 main.write_op(Opcode::Return, 1);
3304
3305 let mut id = Chunk::new();
3306 id.write_op(Opcode::GetLocal, 1);
3307 id.write_u16(0, 1);
3308 id.write_op(Opcode::Return, 1);
3309
3310 let mut p = Program::new();
3311 p.chunks.push(main);
3312 p.chunks.push(id);
3313 p.fn_names.push("main".to_string());
3314 p.fn_names.push("id".to_string());
3315 p.main_index = 0;
3316
3317 let mut vm = Vm::new(p, "x".to_string());
3318 vm.run().expect("run");
3319 assert_eq!(program_result_i64(&vm), 42, "the argument reached local 0");
3320 }
3321
3322 #[test]
3323 fn call_to_a_missing_function_is_a_runtime_error_not_a_panic() {
3324 // CALL fn 9 with only one chunk -- fn-id 9 has no chunk.
3325 let mut main = Chunk::new();
3326 main.write_op(Opcode::Call, 1);
3327 main.write_u16(9, 1);
3328 main.code.push(0);
3329 main.source_lines.push(1);
3330 main.write_op(Opcode::Return, 1);
3331 // run_chunk's Ok arm is a Vm, which has no Debug -- report in words.
3332 match run_chunk(main, "x") {
3333 Err(QalaError::Runtime { message, .. }) => {
3334 assert!(message.contains("missing function"), "got: {message}");
3335 }
3336 Err(other) => panic!("expected a Runtime missing-function error, got {other:?}"),
3337 Ok(_) => panic!("expected a Runtime missing-function error, the program ran clean"),
3338 }
3339 }
3340
3341 #[test]
3342 fn a_stdlib_call_with_a_wrong_argument_count_is_a_clean_runtime_error() {
3343 // the `call_stdlib` seam hands an untrusted argc to the native
3344 // function: a CALL of `print` (fn-id 40000, one parameter) with argc 0
3345 // must surface the native function's arity error as a clean Runtime
3346 // error, never a panic.
3347 let mut main = Chunk::new();
3348 main.write_op(Opcode::Call, 1);
3349 main.write_u16(STDLIB_FN_BASE, 1);
3350 main.code.push(0); // argc 0 -- print expects 1
3351 main.source_lines.push(1);
3352 main.write_op(Opcode::Return, 1);
3353 match run_chunk(main, "x") {
3354 Err(QalaError::Runtime { message, .. }) => {
3355 assert!(message.contains("expects 1 argument"), "got: {message}");
3356 }
3357 Err(other) => panic!("expected a Runtime arity error, got {other:?}"),
3358 Ok(_) => panic!("a wrong-arity stdlib call must error"),
3359 }
3360 }
3361
3362 #[test]
3363 fn fibonacci_recursion_computes_the_correct_numeric_result() {
3364 // the success-criterion smoke test: a recursive fib compiled from Qala
3365 // source. fib(10) == 55.
3366 let src = "\
3367fn fib(n: i64) -> i64 is pure {
3368 if n <= 1 { return n }
3369 return fib(n - 1) + fib(n - 2)
3370}
3371
3372fn main() -> i64 is pure {
3373 return fib(10)
3374}
3375";
3376 let program = compile_qala(src);
3377 let mut vm = Vm::new(program, src.to_string());
3378 vm.run().expect("the fibonacci program runs clean");
3379 assert_eq!(program_result_i64(&vm), 55, "fib(10) must be 55");
3380 }
3381
3382 #[test]
3383 fn deep_recursion_is_a_clean_stack_overflow_runtime_error_with_no_host_panic() {
3384 // the WASM-safety guarantee: an unbounded recursion hits MAX_FRAMES and
3385 // becomes a clean Runtime "stack overflow" -- NOT a host stack overflow
3386 // / abort. the recursion grows `frames`, not the host Rust stack,
3387 // because the VM is a `while` loop.
3388 let src = "\
3389fn r(n: i64) -> i64 is pure {
3390 return r(n + 1)
3391}
3392
3393fn main() -> i64 is pure {
3394 return r(0)
3395}
3396";
3397 let program = compile_qala(src);
3398 let mut vm = Vm::new(program, src.to_string());
3399 match vm.run() {
3400 Err(QalaError::Runtime { message, .. }) => {
3401 assert!(
3402 message.contains("stack overflow"),
3403 "unbounded recursion must report a stack overflow, got: {message}"
3404 );
3405 }
3406 Err(other) => panic!("expected a Runtime stack-overflow error, got {other:?}"),
3407 Ok(_) => panic!("unbounded recursion must error, the program ran clean"),
3408 }
3409 // reaching this line proves the test process did not panic / abort:
3410 // the recursion was caught by the frame cap, the host stack stayed flat.
3411 }
3412
3413 #[test]
3414 fn call_function_value_runs_a_user_callback_and_returns_its_result() {
3415 // call_function_value is the stdlib's re-entry point. chunk 1 doubles
3416 // its argument; calling it via a function Value with arg 21 yields 42.
3417 // chunk 0 (main): RETURN (a placeholder entry chunk)
3418 // chunk 1 (double): GET_LOCAL 0 ; GET_LOCAL 0 ; ADD ; RETURN
3419 let mut main = Chunk::new();
3420 emit_const(&mut main, ConstValue::I64(0), 1);
3421 main.write_op(Opcode::Return, 1);
3422
3423 let mut double = Chunk::new();
3424 double.write_op(Opcode::GetLocal, 1);
3425 double.write_u16(0, 1);
3426 double.write_op(Opcode::GetLocal, 1);
3427 double.write_u16(0, 1);
3428 double.write_op(Opcode::Add, 1);
3429 double.write_op(Opcode::Return, 1);
3430
3431 let mut p = Program::new();
3432 p.chunks.push(main);
3433 p.chunks.push(double);
3434 p.fn_names.push("main".to_string());
3435 p.fn_names.push("double".to_string());
3436 p.main_index = 0;
3437
3438 let mut vm = Vm::new(p, "x".to_string());
3439 // build the argument 21 as a heap Int, then re-enter chunk 1.
3440 let arg_slot = vm.heap.alloc(HeapObject::Int(21)).expect("alloc arg");
3441 let result = vm
3442 .call_function_value(Value::function(1), &[Value::pointer(arg_slot)])
3443 .expect("the callback runs and returns");
3444 let result_slot = result.as_pointer().expect("a heap-Int result");
3445 assert!(
3446 vm.heap.get(result_slot) == Some(&HeapObject::Int(42)),
3447 "double(21) must be 42"
3448 );
3449 }
3450
3451 #[test]
3452 fn call_function_value_of_a_non_function_is_a_runtime_error() {
3453 // a bool is not callable.
3454 let mut vm = Vm::new(program_with(Chunk::new()), "x".to_string());
3455 // the Ok arm is a Value, which has no Debug -- report in words.
3456 match vm.call_function_value(Value::bool(true), &[]) {
3457 Err(QalaError::Runtime { message, .. }) => {
3458 assert!(message.contains("not callable"), "got: {message}");
3459 }
3460 Err(other) => panic!("expected a Runtime not-callable error, got {other:?}"),
3461 Ok(_) => panic!("expected a Runtime not-callable error, got a value"),
3462 }
3463 }
3464
3465 // ---- MAKE_ARRAY / MAKE_TUPLE / MAKE_STRUCT / MAKE_ENUM_VARIANT /
3466 // INDEX / FIELD / LEN ----
3467
3468 #[test]
3469 fn make_array_builds_a_heap_array_then_index_and_len_read_it() {
3470 // CONST 10 ; CONST 20 ; CONST 30 ; MAKE_ARRAY 3 ; ...
3471 // then INDEX 1 -> 20, and (on a second build) LEN -> 3.
3472 // element order: MAKE_ARRAY pops in reverse, so the array is [10,20,30].
3473 let mut chunk = Chunk::new();
3474 emit_const(&mut chunk, ConstValue::I64(10), 1);
3475 emit_const(&mut chunk, ConstValue::I64(20), 1);
3476 emit_const(&mut chunk, ConstValue::I64(30), 1);
3477 chunk.write_op(Opcode::MakeArray, 1);
3478 chunk.write_u16(3, 1);
3479 // INDEX needs the array then the index on top: DUP the array, push 1.
3480 chunk.write_op(Opcode::Dup, 1);
3481 emit_const(&mut chunk, ConstValue::I64(1), 1);
3482 chunk.write_op(Opcode::Index, 1);
3483 // stack now: [array_ptr, 20]. LEN the array under the 20 is awkward;
3484 // instead just assert the INDEX result and the array's heap shape.
3485 chunk.write_op(Opcode::Halt, 1);
3486 let vm = run_chunk(chunk, "x").expect("make_array");
3487 // top is the INDEX result -- element 1 is 20.
3488 assert_eq!(top_i64(&vm), 20, "INDEX 1 of [10,20,30] is 20");
3489 // under it, the array pointer reaches a 3-element heap Array.
3490 let array_ptr = vm.stack[vm.stack.len() - 2];
3491 let slot = array_ptr.as_pointer().expect("an array pointer");
3492 match vm.heap.get(slot) {
3493 Some(HeapObject::Array(items)) => assert_eq!(items.len(), 3),
3494 _ => panic!("MAKE_ARRAY must build a heap Array"),
3495 }
3496 }
3497
3498 #[test]
3499 fn len_of_a_built_array_pushes_the_element_count() {
3500 // CONST 1 ; CONST 2 ; MAKE_ARRAY 2 ; LEN ; HALT -> 2.
3501 let mut chunk = Chunk::new();
3502 emit_const(&mut chunk, ConstValue::I64(1), 1);
3503 emit_const(&mut chunk, ConstValue::I64(2), 1);
3504 chunk.write_op(Opcode::MakeArray, 1);
3505 chunk.write_u16(2, 1);
3506 chunk.write_op(Opcode::Len, 1);
3507 chunk.write_op(Opcode::Halt, 1);
3508 let vm = run_chunk(chunk, "x").expect("len");
3509 assert_eq!(top_i64(&vm), 2, "LEN of a 2-element array is 2");
3510 }
3511
3512 #[test]
3513 fn len_of_a_string_counts_unicode_scalar_values() {
3514 // LEN of a 5-char string is 5.
3515 let mut chunk = Chunk::new();
3516 emit_const(&mut chunk, ConstValue::Str("hello".to_string()), 1);
3517 chunk.write_op(Opcode::Len, 1);
3518 chunk.write_op(Opcode::Halt, 1);
3519 let vm = run_chunk(chunk, "x").expect("len str");
3520 assert_eq!(top_i64(&vm), 5, "LEN of \"hello\" is 5");
3521 }
3522
3523 #[test]
3524 fn make_tuple_builds_a_distinct_heap_tuple_object() {
3525 // MAKE_TUPLE produces a HeapObject::Tuple, not an Array -- the
3526 // distinction lets 05-05's type_of tell a tuple from an array.
3527 let mut chunk = Chunk::new();
3528 emit_const(&mut chunk, ConstValue::I64(7), 1);
3529 emit_const(&mut chunk, ConstValue::Bool(true), 1);
3530 chunk.write_op(Opcode::MakeTuple, 1);
3531 chunk.write_u16(2, 1);
3532 chunk.write_op(Opcode::Halt, 1);
3533 let vm = run_chunk(chunk, "x").expect("make_tuple");
3534 let top = *vm.stack.last().expect("a tuple pointer");
3535 let slot = top.as_pointer().expect("a heap pointer");
3536 match vm.heap.get(slot) {
3537 Some(HeapObject::Tuple(items)) => {
3538 assert_eq!(items.len(), 2, "the tuple has two elements");
3539 // element 0 is the first pushed (7), element 1 the bool.
3540 assert_eq!(items[1].as_bool(), Some(true));
3541 }
3542 _ => panic!("MAKE_TUPLE must build a heap Tuple, not an Array"),
3543 }
3544 }
3545
3546 #[test]
3547 fn index_out_of_bounds_is_a_runtime_error_carrying_the_index_length_and_line() {
3548 // a 2-element array indexed at 5: the error names the index AND the
3549 // length, and its span covers the INDEX opcode's source line.
3550 let mut chunk = Chunk::new();
3551 emit_const(&mut chunk, ConstValue::I64(100), 1);
3552 emit_const(&mut chunk, ConstValue::I64(200), 1);
3553 chunk.write_op(Opcode::MakeArray, 1);
3554 chunk.write_u16(2, 1);
3555 emit_const(&mut chunk, ConstValue::I64(5), 1);
3556 chunk.write_op(Opcode::Index, 4); // the INDEX is on source line 4
3557 chunk.write_op(Opcode::Halt, 4);
3558 let src = "one\ntwo\nthree\nthe index line\nfive";
3559 match run_chunk(chunk, src) {
3560 Err(QalaError::Runtime { message, span }) => {
3561 assert!(message.contains('5'), "the index is named: {message}");
3562 assert!(message.contains('2'), "the length is named: {message}");
3563 assert!(message.contains("out of bounds"), "got: {message}");
3564 assert_eq!(
3565 span.slice(src),
3566 "the index line",
3567 "the error must point at the INDEX's source line"
3568 );
3569 }
3570 Err(other) => panic!("expected an out-of-bounds Runtime error, got {other:?}"),
3571 Ok(_) => panic!("expected an out-of-bounds error, the program ran clean"),
3572 }
3573 }
3574
3575 #[test]
3576 fn index_of_a_negative_index_is_a_runtime_error_not_a_panic() {
3577 // a negative index must not wrap or panic -- it is a clean error.
3578 let mut chunk = Chunk::new();
3579 emit_const(&mut chunk, ConstValue::I64(1), 1);
3580 chunk.write_op(Opcode::MakeArray, 1);
3581 chunk.write_u16(1, 1);
3582 emit_const(&mut chunk, ConstValue::I64(-1), 1);
3583 chunk.write_op(Opcode::Index, 1);
3584 chunk.write_op(Opcode::Halt, 1);
3585 match run_chunk(chunk, "x") {
3586 Err(QalaError::Runtime { message, .. }) => {
3587 assert!(message.contains("out of bounds"), "got: {message}");
3588 }
3589 Err(other) => panic!("expected an out-of-bounds Runtime error, got {other:?}"),
3590 Ok(_) => panic!("expected an out-of-bounds error, the program ran clean"),
3591 }
3592 }
3593
3594 #[test]
3595 fn make_struct_labels_the_struct_with_its_declared_name_and_field_can_read_it() {
3596 // a Program with one struct "Point" of 2 fields. build a Point{1,2},
3597 // confirm the heap struct's type_name is "Point", then FIELD 1 -> 2.
3598 let mut chunk = Chunk::new();
3599 emit_const(&mut chunk, ConstValue::I64(1), 1);
3600 emit_const(&mut chunk, ConstValue::I64(2), 1);
3601 chunk.write_op(Opcode::MakeStruct, 1);
3602 chunk.write_u16(0, 1); // struct id 0
3603 // DUP so one copy is FIELD-accessed and one stays for the heap check.
3604 chunk.write_op(Opcode::Dup, 1);
3605 chunk.write_op(Opcode::Field, 1);
3606 chunk.write_u16(1, 1); // field index 1
3607 chunk.write_op(Opcode::Halt, 1);
3608
3609 let mut p = program_with(chunk);
3610 p.structs.push(crate::chunk::StructInfo {
3611 name: "Point".to_string(),
3612 field_count: 2,
3613 });
3614 let mut vm = Vm::new(p, "x".to_string());
3615 vm.run().expect("make_struct");
3616 // top is FIELD 1's result -- the second field, 2.
3617 assert_eq!(top_i64(&vm), 2, "FIELD 1 of Point{{1,2}} is 2");
3618 // under it, the struct pointer reaches a Struct labelled "Point".
3619 let struct_ptr = vm.stack[vm.stack.len() - 2];
3620 let slot = struct_ptr.as_pointer().expect("a struct pointer");
3621 match vm.heap.get(slot) {
3622 Some(HeapObject::Struct { type_name, fields }) => {
3623 assert_eq!(type_name, "Point", "the struct carries its declared name");
3624 assert_eq!(fields.len(), 2);
3625 }
3626 _ => panic!("MAKE_STRUCT must build a heap Struct"),
3627 }
3628 }
3629
3630 #[test]
3631 fn make_struct_with_a_bad_struct_id_is_a_runtime_error_not_a_panic() {
3632 // MAKE_STRUCT id 9 with an empty structs table.
3633 let mut chunk = Chunk::new();
3634 chunk.write_op(Opcode::MakeStruct, 1);
3635 chunk.write_u16(9, 1);
3636 chunk.write_op(Opcode::Halt, 1);
3637 match run_chunk(chunk, "x") {
3638 Err(QalaError::Runtime { message, .. }) => {
3639 assert!(message.contains("bad struct id"), "got: {message}");
3640 }
3641 Err(other) => panic!("expected a Runtime bad-struct-id error, got {other:?}"),
3642 Ok(_) => panic!("expected a Runtime bad-struct-id error, the program ran clean"),
3643 }
3644 }
3645
3646 #[test]
3647 fn make_enum_variant_builds_a_variant_with_its_enum_and_variant_names() {
3648 // a Program whose variant id 0 is (Shape, Circle). build Circle(5),
3649 // confirm the heap object's type_name / variant / payload.
3650 let mut chunk = Chunk::new();
3651 emit_const(&mut chunk, ConstValue::I64(5), 1);
3652 chunk.write_op(Opcode::MakeEnumVariant, 1);
3653 chunk.write_u16(0, 1); // variant id 0
3654 chunk.code.push(1); // payload count 1
3655 chunk.source_lines.push(1);
3656 chunk.write_op(Opcode::Halt, 1);
3657
3658 let mut p = program_with(chunk);
3659 p.enum_variant_names
3660 .push(("Shape".to_string(), "Circle".to_string()));
3661 let mut vm = Vm::new(p, "x".to_string());
3662 vm.run().expect("make_enum_variant");
3663 let top = *vm.stack.last().expect("a variant pointer");
3664 let slot = top.as_pointer().expect("a heap pointer");
3665 match vm.heap.get(slot) {
3666 Some(HeapObject::EnumVariant {
3667 type_name,
3668 variant,
3669 payload,
3670 }) => {
3671 assert_eq!(type_name, "Shape", "the enum name");
3672 assert_eq!(variant, "Circle", "the variant name");
3673 assert_eq!(payload.len(), 1, "Circle carries one payload value");
3674 }
3675 _ => panic!("MAKE_ENUM_VARIANT must build a heap EnumVariant"),
3676 }
3677 }
3678
3679 #[test]
3680 fn make_enum_variant_with_a_bad_variant_id_is_a_runtime_error_not_a_panic() {
3681 let mut chunk = Chunk::new();
3682 chunk.write_op(Opcode::MakeEnumVariant, 1);
3683 chunk.write_u16(9, 1);
3684 chunk.code.push(0);
3685 chunk.source_lines.push(1);
3686 chunk.write_op(Opcode::Halt, 1);
3687 match run_chunk(chunk, "x") {
3688 Err(QalaError::Runtime { message, .. }) => {
3689 assert!(message.contains("bad variant id"), "got: {message}");
3690 }
3691 Err(other) => panic!("expected a Runtime bad-variant-id error, got {other:?}"),
3692 Ok(_) => panic!("expected a Runtime bad-variant-id error, the program ran clean"),
3693 }
3694 }
3695
3696 #[test]
3697 fn field_of_a_non_struct_is_a_runtime_error_not_a_panic() {
3698 // FIELD on an array pointer -- the pointer does not reach a struct.
3699 let mut chunk = Chunk::new();
3700 emit_const(&mut chunk, ConstValue::I64(1), 1);
3701 chunk.write_op(Opcode::MakeArray, 1);
3702 chunk.write_u16(1, 1);
3703 chunk.write_op(Opcode::Field, 1);
3704 chunk.write_u16(0, 1);
3705 chunk.write_op(Opcode::Halt, 1);
3706 match run_chunk(chunk, "x") {
3707 Err(QalaError::Runtime { message, .. }) => {
3708 assert!(message.contains("expected a struct"), "got: {message}");
3709 }
3710 Err(other) => panic!("expected a Runtime not-a-struct error, got {other:?}"),
3711 Ok(_) => panic!("expected a Runtime not-a-struct error, the program ran clean"),
3712 }
3713 }
3714
3715 // ---- TO_STR / CONCAT_N / MATCH_VARIANT + value_to_string + defer ----
3716
3717 /// the `String` the stack top decodes to: a pointer to a heap `Str`.
3718 fn top_str(vm: &Vm) -> String {
3719 let top = *vm.stack.last().expect("a value on the stack");
3720 let slot = top.as_pointer().expect("a heap pointer");
3721 match vm.heap.get(slot) {
3722 Some(HeapObject::Str(s)) => s.clone(),
3723 _ => panic!("the result pointer does not reach a heap Str"),
3724 }
3725 }
3726
3727 #[test]
3728 fn to_str_of_an_i64_renders_the_decimal_form() {
3729 let mut chunk = Chunk::new();
3730 emit_const(&mut chunk, ConstValue::I64(-42), 1);
3731 chunk.write_op(Opcode::ToStr, 1);
3732 chunk.write_op(Opcode::Halt, 1);
3733 let vm = run_chunk(chunk, "x").expect("to_str i64");
3734 assert_eq!(top_str(&vm), "-42");
3735 }
3736
3737 #[test]
3738 fn to_str_of_a_bool_renders_the_lowercase_keyword() {
3739 let mut chunk = Chunk::new();
3740 emit_const(&mut chunk, ConstValue::Bool(true), 1);
3741 chunk.write_op(Opcode::ToStr, 1);
3742 chunk.write_op(Opcode::Halt, 1);
3743 let vm = run_chunk(chunk, "x").expect("to_str bool");
3744 assert_eq!(top_str(&vm), "true");
3745 }
3746
3747 #[test]
3748 fn to_str_of_a_float_hand_spells_nan_and_infinities() {
3749 // value_to_string of each non-finite f64 matches ConstValue's Display.
3750 let to_str = |x: f64| -> String {
3751 let mut chunk = Chunk::new();
3752 emit_const(&mut chunk, ConstValue::F64(x), 1);
3753 chunk.write_op(Opcode::ToStr, 1);
3754 chunk.write_op(Opcode::Halt, 1);
3755 top_str(&run_chunk(chunk, "x").expect("to_str f64"))
3756 };
3757 assert_eq!(to_str(f64::NAN), "NaN", "a NaN renders as NaN");
3758 assert_eq!(to_str(f64::INFINITY), "inf");
3759 assert_eq!(to_str(f64::NEG_INFINITY), "-inf");
3760 assert_eq!(to_str(3.5), "3.5", "a finite float uses the default form");
3761 }
3762
3763 #[test]
3764 fn value_to_string_renders_each_runtime_value_kind_with_its_locked_spelling() {
3765 // exercise value_to_string directly across every kind.
3766 let mut vm = Vm::new(program_with(Chunk::new()), "x".to_string());
3767 // tagged scalars.
3768 assert_eq!(vm.value_to_string(Value::bool(false)), "false");
3769 assert_eq!(
3770 vm.value_to_string(Value::byte(65)),
3771 "65",
3772 "a byte is decimal"
3773 );
3774 assert_eq!(vm.value_to_string(Value::void()), "()");
3775 assert_eq!(vm.value_to_string(Value::function(7)), "fn#7");
3776 assert_eq!(vm.value_to_string(Value::from_f64(2.0)), "2");
3777 // heap objects.
3778 let int_slot = vm.heap.alloc(HeapObject::Int(-3)).expect("alloc");
3779 assert_eq!(vm.value_to_string(Value::pointer(int_slot)), "-3");
3780 let str_slot = vm
3781 .heap
3782 .alloc(HeapObject::Str("raw text".to_string()))
3783 .expect("alloc");
3784 assert_eq!(
3785 vm.value_to_string(Value::pointer(str_slot)),
3786 "raw text",
3787 "a string renders unquoted"
3788 );
3789 let arr_slot = vm
3790 .heap
3791 .alloc(HeapObject::Array(vec![
3792 Value::pointer(int_slot),
3793 Value::bool(true),
3794 ]))
3795 .expect("alloc");
3796 assert_eq!(vm.value_to_string(Value::pointer(arr_slot)), "[-3, true]");
3797 let variant_slot = vm
3798 .heap
3799 .alloc(HeapObject::EnumVariant {
3800 type_name: "Shape".to_string(),
3801 variant: "Circle".to_string(),
3802 payload: vec![Value::pointer(int_slot)],
3803 })
3804 .expect("alloc");
3805 assert_eq!(
3806 vm.value_to_string(Value::pointer(variant_slot)),
3807 "Shape::Circle(-3)"
3808 );
3809 let bare_variant = vm
3810 .heap
3811 .alloc(HeapObject::EnumVariant {
3812 type_name: "Color".to_string(),
3813 variant: "Red".to_string(),
3814 payload: Vec::new(),
3815 })
3816 .expect("alloc");
3817 assert_eq!(
3818 vm.value_to_string(Value::pointer(bare_variant)),
3819 "Color::Red",
3820 "a payload-less variant renders without parentheses"
3821 );
3822 }
3823
3824 #[test]
3825 fn value_to_string_depth_limit_prevents_stack_overflow() {
3826 // build a chain of arrays nested deeper than MAX_DISPLAY_DEPTH.
3827 // value_to_string must return without panicking (no stack overflow)
3828 // and the deeply-nested innermost level must render as "<...>".
3829 let mut vm = Vm::new(program_with(Chunk::new()), "x".to_string());
3830 // start with an i64 leaf.
3831 let leaf = vm.heap.alloc(HeapObject::Int(1)).expect("alloc");
3832 let mut inner = Value::pointer(leaf);
3833 // wrap it in (MAX_DISPLAY_DEPTH + 2) layers of single-element arrays,
3834 // which is two more than the cut-off.
3835 for _ in 0..(MAX_DISPLAY_DEPTH + 2) {
3836 let slot = vm
3837 .heap
3838 .alloc(HeapObject::Array(vec![inner]))
3839 .expect("alloc");
3840 inner = Value::pointer(slot);
3841 }
3842 let result = vm.value_to_string(inner);
3843 // the outermost levels render normally; somewhere inside we hit "<...>".
3844 assert!(
3845 result.contains("<...>"),
3846 "expected depth sentinel in output, got: {result}"
3847 );
3848 // no panic means the Rust stack did not overflow -- the test passing is
3849 // the safety proof.
3850 }
3851
3852 #[test]
3853 fn concat_n_joins_several_values_into_one_string() {
3854 // CONST "a=" ; CONST 1 ; TO_STR ; CONST "!" ; CONCAT_N 3 -> "a=1!".
3855 let mut chunk = Chunk::new();
3856 emit_const(&mut chunk, ConstValue::Str("a=".to_string()), 1);
3857 emit_const(&mut chunk, ConstValue::I64(1), 1);
3858 chunk.write_op(Opcode::ToStr, 1);
3859 emit_const(&mut chunk, ConstValue::Str("!".to_string()), 1);
3860 chunk.write_op(Opcode::ConcatN, 1);
3861 chunk.write_u16(3, 1);
3862 chunk.write_op(Opcode::Halt, 1);
3863 let vm = run_chunk(chunk, "x").expect("concat_n");
3864 assert_eq!(top_str(&vm), "a=1!", "CONCAT_N joins in source order");
3865 }
3866
3867 #[test]
3868 fn concat_n_of_an_interpolated_nan_float_renders_the_word_nan() {
3869 // the VM-04 guarantee at the CONCAT_N layer: a string interpolation of
3870 // a NaN float renders "NaN", not a numeric token. CONST "v=" ;
3871 // CONST NaN ; CONCAT_N 2 -> "v=NaN" (CONCAT_N stringifies each part).
3872 let mut chunk = Chunk::new();
3873 emit_const(&mut chunk, ConstValue::Str("v=".to_string()), 1);
3874 emit_const(&mut chunk, ConstValue::F64(f64::NAN), 1);
3875 chunk.write_op(Opcode::ConcatN, 1);
3876 chunk.write_u16(2, 1);
3877 chunk.write_op(Opcode::Halt, 1);
3878 let vm = run_chunk(chunk, "x").expect("concat_n nan");
3879 assert_eq!(top_str(&vm), "v=NaN", "an interpolated NaN renders as NaN");
3880 }
3881
3882 #[test]
3883 fn match_variant_on_a_match_destructures_the_payload_onto_the_stack() {
3884 // a Program whose variant id 0 is (Shape, Circle). build Circle(99),
3885 // then MATCH_VARIANT 0: a hit consumes the scrutinee and leaves the
3886 // payload 99 on the stack.
3887 let mut chunk = Chunk::new();
3888 emit_const(&mut chunk, ConstValue::I64(99), 1);
3889 chunk.write_op(Opcode::MakeEnumVariant, 1);
3890 chunk.write_u16(0, 1);
3891 chunk.code.push(1); // payload count 1
3892 chunk.source_lines.push(1);
3893 // MATCH_VARIANT 0, miss-offset +0 (irrelevant on a hit).
3894 chunk.write_op(Opcode::MatchVariant, 1);
3895 chunk.write_u16(0, 1);
3896 chunk.write_i16(0, 1);
3897 chunk.write_op(Opcode::Halt, 1);
3898
3899 let mut p = program_with(chunk);
3900 p.enum_variant_names
3901 .push(("Shape".to_string(), "Circle".to_string()));
3902 let mut vm = Vm::new(p, "x".to_string());
3903 vm.run().expect("match_variant hit");
3904 // a hit consumed the scrutinee and left exactly the payload.
3905 assert_eq!(vm.stack.len(), 1, "only the payload remains");
3906 assert_eq!(top_i64(&vm), 99, "the destructured payload is 99");
3907 }
3908
3909 #[test]
3910 fn match_variant_on_a_miss_leaves_the_scrutinee_and_branches_by_the_offset() {
3911 // build Circle(1), then MATCH_VARIANT against the (Shape, Square)
3912 // variant id -- a miss. the scrutinee stays on the stack, ip branches
3913 // past a skipped CONST by the i16 offset.
3914 let mut chunk = Chunk::new();
3915 emit_const(&mut chunk, ConstValue::I64(1), 1); // bytes 0..2
3916 chunk.write_op(Opcode::MakeEnumVariant, 1); // byte 3
3917 chunk.write_u16(0, 1); // bytes 4..5: variant id 0 (Circle)
3918 chunk.code.push(1); // byte 6: payload count
3919 chunk.source_lines.push(1);
3920 // MATCH_VARIANT at byte 7, operand bytes 8..11, fall-through 12.
3921 chunk.write_op(Opcode::MatchVariant, 1); // byte 7
3922 chunk.write_u16(1, 1); // bytes 8..9: variant id 1 (Square) -- a miss
3923 chunk.write_i16(3, 1); // bytes 10..11: miss offset +3 -> target 15
3924 emit_const(&mut chunk, ConstValue::I64(777), 1); // bytes 12..14 (skipped)
3925 chunk.write_op(Opcode::Halt, 1); // byte 15
3926 let mut p = program_with(chunk);
3927 p.enum_variant_names
3928 .push(("Shape".to_string(), "Circle".to_string()));
3929 p.enum_variant_names
3930 .push(("Shape".to_string(), "Square".to_string()));
3931 let mut vm = Vm::new(p, "x".to_string());
3932 vm.run().expect("match_variant miss");
3933 // a miss left the scrutinee and branched past the CONST 777, so the
3934 // stack holds exactly the one scrutinee pointer.
3935 assert_eq!(
3936 vm.stack.len(),
3937 1,
3938 "the scrutinee stays, the CONST was skipped"
3939 );
3940 let top = *vm.stack.last().expect("the scrutinee");
3941 let slot = top.as_pointer().expect("the scrutinee is an enum pointer");
3942 assert!(
3943 matches!(vm.heap.get(slot), Some(HeapObject::EnumVariant { .. })),
3944 "the value left on the stack is the original scrutinee"
3945 );
3946 }
3947
3948 #[test]
3949 fn match_variant_of_a_non_enum_scrutinee_is_a_runtime_error_not_a_panic() {
3950 // MATCH_VARIANT on an i64 -- not an enum value.
3951 let mut chunk = Chunk::new();
3952 emit_const(&mut chunk, ConstValue::I64(5), 1);
3953 chunk.write_op(Opcode::MatchVariant, 1);
3954 chunk.write_u16(0, 1);
3955 chunk.write_i16(0, 1);
3956 chunk.write_op(Opcode::Halt, 1);
3957 let mut p = program_with(chunk);
3958 p.enum_variant_names
3959 .push(("E".to_string(), "V".to_string()));
3960 let mut vm = Vm::new(p, "x".to_string());
3961 match vm.run() {
3962 Err(QalaError::Runtime { message, .. }) => {
3963 assert!(message.contains("not an enum"), "got: {message}");
3964 }
3965 other => panic!("expected a Runtime non-enum-scrutinee error, got {other:?}"),
3966 }
3967 }
3968
3969 #[test]
3970 fn defer_bytecode_runs_in_lifo_order_at_scope_exit() {
3971 // VM-03: Phase 4 codegen splices defer bytecode inline at scope exit,
3972 // emitted in REVERSE (LIFO) order. this test compiles a Qala program
3973 // with two defers, runs it through the VM, and confirms both that the
3974 // VM executes the spliced defer bytecode correctly (the program runs
3975 // clean to completion -- no stack imbalance from the extra calls) AND
3976 // that the two defer call sites appear LIFO in the emitted bytecode.
3977 let src = "\
3978fn first() -> i64 is pure { return 1 }
3979fn second() -> i64 is pure { return 2 }
3980fn run() -> i64 is pure {
3981 defer first()
3982 defer second()
3983 return 0
3984}
3985fn main() -> i64 is pure { return run() }
3986";
3987 let program = compile_qala(src);
3988 // `first` and `second` are the first two functions compiled -- dense
3989 // ids 0 and 1. the `run` chunk's defers must CALL id 1 (second) BEFORE
3990 // id 0 (first): the source order is first-then-second, LIFO reverses it.
3991 let run_idx = program
3992 .fn_names
3993 .iter()
3994 .position(|n| n == "run")
3995 .expect("the run function exists");
3996 let code = &program.chunks[run_idx].code;
3997 // collect the fn-ids of every CALL in the run chunk, in byte order.
3998 let mut call_ids: Vec<u16> = Vec::new();
3999 let mut ip = 0;
4000 while ip < code.len() {
4001 let Some(op) = Opcode::from_u8(code[ip]) else {
4002 break;
4003 };
4004 if op == Opcode::Call {
4005 call_ids.push(u16::from_le_bytes([code[ip + 1], code[ip + 2]]));
4006 }
4007 ip += 1 + op.operand_bytes() as usize;
4008 }
4009 let first_id = program.fn_names.iter().position(|n| n == "first").unwrap() as u16;
4010 let second_id = program.fn_names.iter().position(|n| n == "second").unwrap() as u16;
4011 assert_eq!(
4012 call_ids,
4013 vec![second_id, first_id],
4014 "the run chunk must CALL second's defer before first's defer -- LIFO"
4015 );
4016 // and the VM runs the whole program (defer bytecode included) clean.
4017 let mut vm = Vm::new(program, src.to_string());
4018 vm.run().expect("the program with two defers runs clean");
4019 assert_eq!(
4020 program_result_i64(&vm),
4021 0,
4022 "run returns 0; the defers ran for effect"
4023 );
4024 }
4025
4026 // ---- get_state + single-step inspection ----
4027
4028 #[test]
4029 fn state_reflects_the_value_stack_and_ip_after_each_step() {
4030 // CONST 10 ; CONST 20 ; ADD ; HALT -- step through and watch get_state
4031 // track the stack growing then collapsing, and the ip advancing.
4032 let mut chunk = Chunk::new();
4033 emit_const(&mut chunk, ConstValue::I64(10), 1);
4034 emit_const(&mut chunk, ConstValue::I64(20), 1);
4035 chunk.write_op(Opcode::Add, 1);
4036 chunk.write_op(Opcode::Halt, 1);
4037 let mut vm = Vm::new(program_with(chunk), "x".to_string());
4038
4039 // before any step: ip 0, an empty stack.
4040 let s0 = vm.get_state();
4041 assert_eq!(s0.ip, 0);
4042 assert_eq!(s0.chunk_index, 0);
4043 assert!(s0.stack.is_empty(), "no instruction has run yet");
4044
4045 // step 1 -- CONST 10: ip 0 -> 3, one value on the stack.
4046 assert_eq!(vm.step().expect("step 1"), StepOutcome::Ran);
4047 let s1 = vm.get_state();
4048 assert_eq!(s1.ip, 3, "CONST advanced ip by its 3 bytes");
4049 assert_eq!(s1.stack.len(), 1);
4050 assert_eq!(s1.stack[0].rendered, "10");
4051
4052 // step 2 -- CONST 20: ip 3 -> 6, two values.
4053 assert_eq!(vm.step().expect("step 2"), StepOutcome::Ran);
4054 let s2 = vm.get_state();
4055 assert_eq!(s2.ip, 6);
4056 assert_eq!(s2.stack.len(), 2);
4057 assert_eq!(s2.stack[1].rendered, "20");
4058
4059 // step 3 -- ADD: ip 6 -> 7, the two operands collapse to one result.
4060 assert_eq!(vm.step().expect("step 3"), StepOutcome::Ran);
4061 let s3 = vm.get_state();
4062 assert_eq!(s3.ip, 7);
4063 assert_eq!(s3.stack.len(), 1, "ADD popped two, pushed one");
4064 assert_eq!(s3.stack[0].rendered, "30", "10 + 20 == 30");
4065 }
4066
4067 #[test]
4068 fn state_current_line_tracks_the_source_map_then_is_zero_after_halt() {
4069 // PGUI-08: get_state surfaces the 1-based source line of the
4070 // instruction at ip, read from the chunk's source_lines map. emit_const
4071 // writes every byte of CONST + HALT against the line it is given here,
4072 // so the whole chunk maps to line 7.
4073 let mut chunk = Chunk::new();
4074 emit_const(&mut chunk, ConstValue::I64(1), 7);
4075 chunk.write_op(Opcode::Halt, 7);
4076 let mut vm = Vm::new(program_with(chunk), "x".to_string());
4077
4078 // a stepped program: ip points inside the chunk, so source_lines.get(ip)
4079 // hits and current_line is the mapped line.
4080 assert_eq!(vm.step().expect("step 1"), StepOutcome::Ran);
4081 let stepped = vm.get_state();
4082 assert_eq!(stepped.ip, 3, "CONST advanced ip past its 3 bytes");
4083 assert_eq!(
4084 stepped.current_line, 7,
4085 "current_line is the source line the source map records for ip"
4086 );
4087
4088 // after the program halts the frame stack is empty: the terminal
4089 // snapshot puts ip one past the chunk's code, source_lines.get(ip)
4090 // misses, and current_line falls back to 0 -- "no line to highlight".
4091 assert_eq!(vm.step().expect("step 2"), StepOutcome::Halted);
4092 let halted = vm.get_state();
4093 assert_eq!(
4094 halted.current_line, 0,
4095 "a finished program has no current line"
4096 );
4097 }
4098
4099 #[test]
4100 fn state_value_type_name_is_correct_for_each_primitive_kind() {
4101 // push an i64, an f64, a bool, and a str, then read get_state -- each
4102 // StateValue.type_name must name the right runtime type.
4103 let mut chunk = Chunk::new();
4104 emit_const(&mut chunk, ConstValue::I64(1), 1);
4105 emit_const(&mut chunk, ConstValue::F64(2.5), 1);
4106 emit_const(&mut chunk, ConstValue::Bool(true), 1);
4107 emit_const(&mut chunk, ConstValue::Str("hi".to_string()), 1);
4108 chunk.write_op(Opcode::Halt, 1);
4109 let mut vm = Vm::new(program_with(chunk), "x".to_string());
4110 vm.run().expect("run");
4111 let state = vm.get_state();
4112 assert_eq!(state.stack.len(), 4, "four values on the stack");
4113 assert_eq!(state.stack[0].type_name, "i64");
4114 assert_eq!(state.stack[1].type_name, "f64");
4115 assert_eq!(state.stack[2].type_name, "bool");
4116 assert_eq!(state.stack[3].type_name, "str");
4117 }
4118
4119 #[test]
4120 fn state_variables_carry_the_real_source_names_from_the_chunk() {
4121 // a chunk whose local_names table names slot 0 `count` and slot 1
4122 // `total`; after SET_LOCALs, get_state reports those names, not slotN.
4123 let mut chunk = Chunk::new();
4124 emit_const(&mut chunk, ConstValue::I64(3), 1);
4125 chunk.write_op(Opcode::SetLocal, 1);
4126 chunk.write_u16(0, 1);
4127 emit_const(&mut chunk, ConstValue::I64(9), 1);
4128 chunk.write_op(Opcode::SetLocal, 1);
4129 chunk.write_u16(1, 1);
4130 chunk.write_op(Opcode::Halt, 1);
4131 chunk.local_names = vec!["count".to_string(), "total".to_string()];
4132 let mut vm = Vm::new(program_with(chunk), "x".to_string());
4133 vm.run().expect("run");
4134 let state = vm.get_state();
4135 assert_eq!(state.variables.len(), 2, "two locals are bound");
4136 assert_eq!(state.variables[0].name, "count", "slot 0 is the real name");
4137 assert_eq!(state.variables[0].value.rendered, "3");
4138 assert_eq!(state.variables[1].name, "total");
4139 assert_eq!(state.variables[1].value.rendered, "9");
4140 }
4141
4142 #[test]
4143 fn state_falls_back_to_slot_index_for_an_unnamed_temporary() {
4144 // a slot the chunk's local_names leaves empty (a compiler temporary)
4145 // is reported as `slot{i}`, not as a blank name.
4146 let mut chunk = Chunk::new();
4147 emit_const(&mut chunk, ConstValue::I64(7), 1);
4148 chunk.write_op(Opcode::SetLocal, 1);
4149 chunk.write_u16(0, 1);
4150 chunk.write_op(Opcode::Halt, 1);
4151 // an empty name for slot 0 -- a hidden temporary.
4152 chunk.local_names = vec![String::new()];
4153 let mut vm = Vm::new(program_with(chunk), "x".to_string());
4154 vm.run().expect("run");
4155 let state = vm.get_state();
4156 assert_eq!(state.variables.len(), 1);
4157 assert_eq!(
4158 state.variables[0].name, "slot0",
4159 "an unnamed slot falls back to slot{{i}}"
4160 );
4161 }
4162
4163 #[test]
4164 fn state_on_a_finished_program_is_a_terminal_snapshot_not_a_panic() {
4165 // a program that runs to completion empties its frame stack (the last
4166 // RETURN pops main's frame). get_state must still build -- a terminal
4167 // snapshot, no panic, no out-of-bounds index.
4168 let src = "fn main() -> i64 is pure { return 42 }";
4169 let program = compile_qala(src);
4170 let mut vm = Vm::new(program, src.to_string());
4171 vm.run().expect("the program runs clean");
4172 // after run() the frame stack is empty; get_state handles it.
4173 let state = vm.get_state();
4174 assert!(
4175 state.variables.is_empty(),
4176 "a finished program has no frame"
4177 );
4178 // the result value (42) is still on the stack -- get_state shows it.
4179 assert!(
4180 state.stack.iter().any(|s| s.rendered == "42"),
4181 "the program result is visible in the terminal snapshot"
4182 );
4183 }
4184
4185 #[test]
4186 fn get_state_output_is_deterministic_across_two_calls() {
4187 // the snapshot iterates Vecs only, no HashMap -- two calls on the same
4188 // VM state must produce equal snapshots (the Phase 6 bridge contract).
4189 let src = "\
4190fn main() -> i64 is pure {
4191 let a = 1
4192 let b = 2
4193 return a + b
4194}
4195";
4196 let program = compile_qala(src);
4197 let mut vm = Vm::new(program, src.to_string());
4198 // step partway in so there are locals and a non-empty stack.
4199 for _ in 0..4 {
4200 if vm.step().expect("step") == StepOutcome::Halted {
4201 break;
4202 }
4203 }
4204 let first = vm.get_state();
4205 let second = vm.get_state();
4206 assert_eq!(first.ip, second.ip);
4207 assert_eq!(first.chunk_index, second.chunk_index);
4208 assert_eq!(first.stack.len(), second.stack.len());
4209 assert_eq!(first.variables.len(), second.variables.len());
4210 for (a, b) in first.variables.iter().zip(second.variables.iter()) {
4211 assert_eq!(a.name, b.name, "variable order is stable across calls");
4212 assert_eq!(a.value.rendered, b.value.rendered);
4213 }
4214 }
4215
4216 #[test]
4217 fn step_advances_ip_by_exactly_one_full_instruction_for_every_width() {
4218 // a program mixing a 3-byte instruction (CONST), a 1-byte instruction
4219 // (POP), and a 3-byte one (CONST again): each step() must advance ip by
4220 // exactly 1 + operand_bytes() of the opcode it executed -- one step is
4221 // one whole multi-byte instruction, never a partial advance.
4222 let mut chunk = Chunk::new();
4223 emit_const(&mut chunk, ConstValue::I64(1), 1); // bytes 0..2
4224 chunk.write_op(Opcode::Pop, 1); // byte 3
4225 emit_const(&mut chunk, ConstValue::I64(2), 1); // bytes 4..6
4226 chunk.write_op(Opcode::Halt, 1); // byte 7
4227 let mut vm = Vm::new(program_with(chunk), "x".to_string());
4228
4229 // each step: capture ip before, step, assert the delta == 1 + operands.
4230 let before1 = vm.frame().expect("frame").ip;
4231 assert_eq!(vm.step().expect("step 1"), StepOutcome::Ran);
4232 let after1 = vm.frame().expect("frame").ip;
4233 assert_eq!(
4234 after1 - before1,
4235 1 + Opcode::Const.operand_bytes() as usize,
4236 "CONST advances by 1 + its operand width"
4237 );
4238
4239 let before2 = vm.frame().expect("frame").ip;
4240 assert_eq!(vm.step().expect("step 2"), StepOutcome::Ran);
4241 let after2 = vm.frame().expect("frame").ip;
4242 assert_eq!(
4243 after2 - before2,
4244 1 + Opcode::Pop.operand_bytes() as usize,
4245 "POP is a zero-operand opcode -- ip advances by exactly 1"
4246 );
4247
4248 let before3 = vm.frame().expect("frame").ip;
4249 assert_eq!(vm.step().expect("step 3"), StepOutcome::Ran);
4250 let after3 = vm.frame().expect("frame").ip;
4251 assert_eq!(
4252 after3 - before3,
4253 1 + Opcode::Const.operand_bytes() as usize,
4254 "the second CONST advances by 1 + its operand width"
4255 );
4256 }
4257
4258 #[test]
4259 fn vm_state_implements_serialize_for_the_wasm_bridge() {
4260 // a compile-time witness: if VmState (and the StateValue / NamedValue
4261 // it nests) did not derive serde::Serialize, this generic call would
4262 // fail to typecheck. Phase 6's WASM bridge serializes get_state's
4263 // output, so the derive is part of the locked contract.
4264 fn assert_serialize<T: serde::Serialize>(_: &T) {}
4265 let src = "fn main() -> i64 is pure { return 5 }";
4266 let program = compile_qala(src);
4267 let mut vm = Vm::new(program, src.to_string());
4268 vm.run().expect("run");
4269 let state = vm.get_state();
4270 assert_serialize(&state);
4271 }
4272
4273 #[test]
4274 fn runtime_type_name_renders_compound_types_structurally() {
4275 // an array of i64 is `[i64]`; an empty array is `[]`. build them on the
4276 // heap directly and ask the shared helper.
4277 let mut vm = Vm::new(program_with(Chunk::new()), "x".to_string());
4278 let int_slot = vm.heap.alloc(HeapObject::Int(1)).expect("alloc int");
4279 let arr = vm
4280 .heap
4281 .alloc(HeapObject::Array(vec![Value::pointer(int_slot)]))
4282 .expect("alloc array");
4283 assert_eq!(vm.runtime_type_name(Value::pointer(arr)), "[i64]");
4284 let empty = vm.heap.alloc(HeapObject::Array(Vec::new())).expect("alloc");
4285 assert_eq!(
4286 vm.runtime_type_name(Value::pointer(empty)),
4287 "[]",
4288 "an empty array has no element type to show"
4289 );
4290 // a tuple renders its element types positionally.
4291 let s = vm.heap.alloc(HeapObject::Str("k".to_string())).expect("s");
4292 let tup = vm
4293 .heap
4294 .alloc(HeapObject::Tuple(vec![
4295 Value::pointer(int_slot),
4296 Value::pointer(s),
4297 ]))
4298 .expect("alloc tuple");
4299 assert_eq!(vm.runtime_type_name(Value::pointer(tup)), "(i64, str)");
4300 }
4301
4302 #[test]
4303 fn runtime_type_name_depth_limit_prevents_stack_overflow() {
4304 // build a chain of arrays nested deeper than MAX_DISPLAY_DEPTH and
4305 // verify that runtime_type_name returns without panicking and that
4306 // the depth sentinel "..." appears in the output.
4307 let mut vm = Vm::new(program_with(Chunk::new()), "x".to_string());
4308 let leaf = vm.heap.alloc(HeapObject::Int(1)).expect("alloc");
4309 let mut inner = Value::pointer(leaf);
4310 for _ in 0..(MAX_DISPLAY_DEPTH + 2) {
4311 let slot = vm
4312 .heap
4313 .alloc(HeapObject::Array(vec![inner]))
4314 .expect("alloc");
4315 inner = Value::pointer(slot);
4316 }
4317 let result = vm.runtime_type_name(inner);
4318 assert!(
4319 result.contains("..."),
4320 "expected depth sentinel in type name, got: {result}"
4321 );
4322 }
4323
4324 // ---- the REPL entry point ----
4325
4326 /// the i64 a REPL result Value decodes to -- the result is a heap pointer
4327 /// to a HeapObject::Int.
4328 fn repl_result_i64(vm: &Vm, v: Value) -> i64 {
4329 let slot = v.as_pointer().expect("a REPL i64 result is a heap pointer");
4330 match vm.heap.get(slot) {
4331 Some(HeapObject::Int(n)) => *n,
4332 _ => panic!("the REPL result pointer does not reach a heap Int"),
4333 }
4334 }
4335
4336 #[test]
4337 fn repl_persists_a_binding_from_one_call_to_the_next() {
4338 // the VM-07 success criterion: a `let` on one REPL call, then an
4339 // expression using it on the next, returns the expected value.
4340 let mut vm = Vm::new_repl();
4341 // call 1: a let binding -- a statement, the result is void.
4342 let first = vm
4343 .repl_eval("let x = 5")
4344 .expect("the let binding compiles and runs");
4345 assert!(first.as_void(), "a let-statement REPL line yields void");
4346 // call 2: an expression using the prior binding -- x + 1 == 6.
4347 let second = vm
4348 .repl_eval("x + 1")
4349 .expect("the expression sees the prior binding");
4350 assert_eq!(
4351 repl_result_i64(&vm, second),
4352 6,
4353 "x (5) defined on the first call plus 1 is 6 on the second"
4354 );
4355 }
4356
4357 #[test]
4358 fn repl_evaluates_a_standalone_expression() {
4359 // a single expression call with no prior history returns its value.
4360 let mut vm = Vm::new_repl();
4361 let r = vm.repl_eval("2 * 21").expect("a bare expression evaluates");
4362 assert_eq!(repl_result_i64(&vm, r), 42);
4363 }
4364
4365 #[test]
4366 fn render_value_renders_an_i64_result_to_its_display_and_type_pair() {
4367 // the public render_value method an external consumer (the qala CLI's
4368 // REPL) uses to display a result Value: it must return the same
4369 // (display, type) pair the in-crate value_to_string / runtime_type_name
4370 // helpers produce. evaluate `let x = 5` then `x + 1` -- the result is
4371 // the i64 6, which renders as ("6", "i64").
4372 let mut vm = Vm::new_repl();
4373 vm.repl_eval("let x = 5")
4374 .expect("the let binding compiles and runs");
4375 let result = vm
4376 .repl_eval("x + 1")
4377 .expect("the expression sees the prior binding");
4378 let (display, type_name) = vm.render_value(result);
4379 assert_eq!(display, "6", "x (5) + 1 displays as 6");
4380 assert_eq!(type_name, "i64", "the result is an i64");
4381 }
4382
4383 #[test]
4384 fn repl_keeps_the_console_buffer_across_calls() {
4385 // the persistence contract: a REPL call resets the value stack, heap,
4386 // and frames but KEEPS the console buffer so output accumulates across
4387 // calls. now that `call_stdlib` is wired, a REPL `println` line writes
4388 // a real console entry -- the test drives it end to end.
4389 let mut vm = Vm::new_repl();
4390 // a first call establishes a binding and a real program.
4391 vm.repl_eval("let x = 1").expect("first call");
4392 // a real println REPL line: the native stdlib writes the console.
4393 vm.repl_eval("println(\"output one\")")
4394 .expect("println call");
4395 // a later REPL call rebuilds and re-runs the whole accumulated program.
4396 vm.repl_eval("let y = 2").expect("a later call");
4397 // the console entry from before the later call survived it.
4398 assert!(
4399 vm.console
4400 .iter()
4401 .any(|line| line.trim_end_matches('\n') == "output one"),
4402 "a repl call must not clear the console -- output accumulates"
4403 );
4404 }
4405
4406 #[test]
4407 fn repl_a_non_compiling_line_does_not_poison_later_calls() {
4408 // a line that does not compile returns an Err and is NOT appended to
4409 // the history -- a SUBSEQUENT valid line that uses an earlier binding
4410 // still succeeds.
4411 let mut vm = Vm::new_repl();
4412 vm.repl_eval("let x = 10")
4413 .expect("the first binding is fine");
4414 // a line referencing an undefined name: a typecheck error.
4415 match vm.repl_eval("let bad = no_such_name") {
4416 Err(_) => {} // expected -- the bad line is rejected.
4417 Ok(_) => panic!("a line with an undefined name must not compile"),
4418 }
4419 // the bad line was not appended; a later valid line using `x` works.
4420 let r = vm
4421 .repl_eval("x + 2")
4422 .expect("the earlier binding is intact after the bad line");
4423 assert_eq!(
4424 repl_result_i64(&vm, r),
4425 12,
4426 "x (10) is still in scope -- the bad line did not poison the history"
4427 );
4428 }
4429
4430 #[test]
4431 fn repl_a_parse_error_line_is_also_not_appended_to_history() {
4432 // a syntactically malformed line (not just a type error) is likewise
4433 // rejected and not appended.
4434 let mut vm = Vm::new_repl();
4435 vm.repl_eval("let a = 7").expect("a valid binding");
4436 match vm.repl_eval("let = = =") {
4437 Err(_) => {}
4438 Ok(_) => panic!("a malformed line must not compile"),
4439 }
4440 // a later valid line still sees `a`.
4441 let r = vm
4442 .repl_eval("a")
4443 .expect("the binding survives the parse error");
4444 assert_eq!(repl_result_i64(&vm, r), 7);
4445 }
4446
4447 #[test]
4448 fn repl_a_function_declaration_line_is_callable_on_a_later_call() {
4449 // a top-level item (a `fn`) is placed outside the synthetic entry
4450 // function as a sibling; a later REPL line can call it.
4451 let mut vm = Vm::new_repl();
4452 let decl = vm
4453 .repl_eval("fn triple(n: i64) -> i64 is pure { return n * 3 }")
4454 .expect("a fn declaration compiles");
4455 assert!(decl.as_void(), "an item-declaration REPL line yields void");
4456 let r = vm
4457 .repl_eval("triple(14)")
4458 .expect("the declared function is callable later");
4459 assert_eq!(repl_result_i64(&vm, r), 42, "triple(14) is 42");
4460 }
4461
4462 #[test]
4463 fn repl_call_expression_is_classified_as_expression_not_item() {
4464 // a function-call expression like `triple(14)` also parses as a valid
4465 // one-item top-level program under a lenient parser. the expression
4466 // probe must run first so such lines are classified as Expression
4467 // (value captured and returned) not as Item (placed outside
4468 // __repl_main, value discarded as void).
4469 let mut vm = Vm::new_repl();
4470 // declare a function first so the call typechecks.
4471 vm.repl_eval("fn triple(n: i64) -> i64 is pure { return n * 3 }")
4472 .expect("fn declaration compiles");
4473 // calling it must return 42, not void -- if misclassified as an Item
4474 // the call is placed outside __repl_main and the REPL returns void.
4475 let r = vm
4476 .repl_eval("triple(14)")
4477 .expect("the call expression evaluates");
4478 assert_eq!(
4479 repl_result_i64(&vm, r),
4480 42,
4481 "a call expression is an Expression -- its value must be returned, not void"
4482 );
4483 }
4484
4485 #[test]
4486 fn repl_new_repl_starts_blank() {
4487 // a fresh REPL VM has no program, no frames, an empty history and
4488 // console -- it is ready to receive its first line.
4489 let vm = Vm::new_repl();
4490 assert!(vm.program.chunks.is_empty(), "no program yet");
4491 assert!(vm.frames.is_empty(), "no frame until the first repl call");
4492 assert!(vm.repl_history.is_empty(), "an empty accumulated history");
4493 assert!(vm.console.is_empty(), "an empty console");
4494 }
4495
4496 // ---- the stdlib seam + the file-handle leak check ----
4497
4498 #[test]
4499 fn stdlib_call_runs_a_native_function_end_to_end() {
4500 // a compiled Qala program that calls a stdlib function: `println`
4501 // writes its argument to the console. this exercises the whole path --
4502 // codegen emits a CALL with a stdlib fn-id, op_call routes it to
4503 // call_stdlib, call_stdlib dispatches to crate::stdlib::println.
4504 let src = "fn main() is io { println(\"hi from qala\") }\n";
4505 let program = compile_qala(src);
4506 let mut vm = Vm::new(program, src.to_string());
4507 vm.run().expect("a program that calls println runs clean");
4508 assert!(
4509 vm.console
4510 .iter()
4511 .any(|line| line.trim_end_matches('\n') == "hi from qala"),
4512 "println wrote its argument to the console, got: {:?}",
4513 vm.console
4514 );
4515 }
4516
4517 #[test]
4518 fn stdlib_len_call_returns_the_collection_length() {
4519 // a second stdlib path: `len` of an array literal returns 3, the
4520 // program returns it, and the result decodes to 3.
4521 let src = "fn main() -> i64 is pure { return len([10, 20, 30]) }\n";
4522 let program = compile_qala(src);
4523 let mut vm = Vm::new(program, src.to_string());
4524 vm.run().expect("a program that calls len runs clean");
4525 assert_eq!(program_result_i64(&vm), 3, "len([10,20,30]) is 3");
4526 }
4527
4528 #[test]
4529 fn leak_detected_for_a_file_handle_dropped_without_close() {
4530 // a program that opens a file handle and never closes it: when `main`
4531 // returns, its local `f` goes out of scope still open -- the leak check
4532 // logs it. the leak log is non-empty.
4533 let src = "fn main() is io {\n let f = open(\"data.txt\")\n}\n";
4534 let program = compile_qala(src);
4535 let mut vm = Vm::new(program, src.to_string());
4536 vm.run()
4537 .expect("the program itself runs clean -- a leak is not an error");
4538 assert!(
4539 !vm.leak_log.is_empty(),
4540 "an open handle dropped without close must be logged as a leak"
4541 );
4542 assert!(
4543 vm.leak_log.iter().any(|m| m.contains("data.txt")),
4544 "the leak message names the leaked handle's path, got: {:?}",
4545 vm.leak_log
4546 );
4547 }
4548
4549 #[test]
4550 fn no_leak_when_a_file_handle_is_closed_with_defer() {
4551 // the matching no-leak case: `defer close(f)` closes the handle before
4552 // the function returns (codegen splices the close bytecode at the
4553 // return's scope exit), so the handle is closed when the leak check
4554 // runs on that frame -- the leak log stays empty.
4555 //
4556 // the defer lives in a function that ends in an explicit `return`: a
4557 // fall-through `defer` (a block with no terminator) trips a separate,
4558 // pre-existing codegen scope-ordering bug logged in
4559 // .planning/phases/05-bytecode-vm-stdlib/deferred-items.md -- out of
4560 // scope for this plan, and the bundled `defer-demo.qala` (which also
4561 // ends in `return`) is unaffected.
4562 let src = "fn use_handle() -> i64 is io {\n \
4563 let f = open(\"data.txt\")\n \
4564 defer close(f)\n \
4565 return 0\n}\n\
4566 fn main() is io {\n let n = use_handle()\n}\n";
4567 let program = compile_qala(src);
4568 let mut vm = Vm::new(program, src.to_string());
4569 vm.run().expect("the program runs clean");
4570 assert!(
4571 vm.leak_log.is_empty(),
4572 "a handle closed via defer must not be logged as a leak, got: {:?}",
4573 vm.leak_log
4574 );
4575 }
4576
4577 #[test]
4578 fn no_leak_when_a_file_handle_is_closed_explicitly() {
4579 // an explicit close(f), no defer: the handle is closed before main
4580 // returns, so the leak check finds nothing.
4581 let src = "fn main() is io {\n \
4582 let f = open(\"data.txt\")\n \
4583 close(f)\n}\n";
4584 let program = compile_qala(src);
4585 let mut vm = Vm::new(program, src.to_string());
4586 vm.run().expect("the program runs clean");
4587 assert!(
4588 vm.leak_log.is_empty(),
4589 "an explicitly closed handle must not leak, got: {:?}",
4590 vm.leak_log
4591 );
4592 }
4593}