qala_compiler/opcode.rs
1//! the bytecode opcode enum: every byte the codegen emits is one of these
2//! variants encoded as `Opcode::Foo as u8`. dense discriminants 0..=45 for
3//! the real opcodes plus [`Opcode::Halt`] at `0xFF` as a sentinel for the
4//! decoder so an out-of-bounds byte is detectable rather than silently
5//! reinterpretable.
6//!
7//! the layout is part of the bytecode format. adding a new opcode means
8//! three places: the enum variant (with an explicit discriminant), the
9//! [`Opcode::from_u8`] arm, the [`Opcode::name`] arm. [`Opcode::operand_bytes`]
10//! adds a fourth if the new opcode carries operands. the unit tests in this
11//! module loop over every variant and over every byte 0..=255, so a missing
12//! arm or a wrong discriminant fails loudly.
13//!
14//! the disassembler ([`crate::chunk::Chunk::disassemble`], plan 04-03) reads
15//! the byte stream back via [`Opcode::from_u8`] and renders via
16//! [`Opcode::name`]; the peephole optimizer ([`crate::optimizer::peephole`],
17//! plan 04-05) uses [`Opcode::operand_bytes`] as its step function for
18//! walking past whole instructions. these three methods are the public
19//! contract this file owns.
20//!
21//! no `transmute`-based decode (anti-pattern per Phase 4 research): the
22//! match-based reverse lookup compiles to a branch table and is safe on
23//! every byte. zero-cost safety; idiomatic Rust pattern.
24
25/// the first fn-id reserved for the native standard library.
26///
27/// codegen assigns user functions dense ids `0..N` (into `Program::chunks`)
28/// and stdlib functions ids `STDLIB_FN_BASE + i`. the VM's `CALL` handler
29/// branches on this threshold: a fn-id at or above it routes to the native
30/// stdlib dispatcher; below it looks up a user chunk. all three modules that
31/// use this value (`codegen.rs`, `vm.rs`, `stdlib.rs`) import it from here so
32/// there is exactly one definition to change.
33pub const STDLIB_FN_BASE: u16 = 40_000;
34
35/// the bytecode opcode set.
36///
37/// one byte per opcode, dense discriminants 0..=45 for the active set plus
38/// [`Opcode::Halt`] at `0xFF` as the decoder's "unknown / end-of-stream"
39/// sentinel. derives `Copy` because opcodes flow through codegen and the
40/// peephole optimizer by value; one byte is cheaper to copy than to
41/// reference.
42///
43/// the discriminants are part of the bytecode format -- changing them
44/// reshapes every compiled chunk. add new opcodes by appending discriminants
45/// above the current high-water mark; never reuse a freed discriminant.
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
47#[repr(u8)]
48pub enum Opcode {
49 // ---- stack ----
50 /// push the constant pool entry at index `u16` onto the stack. emitted
51 /// for every literal and for every constant-folded result.
52 Const = 0,
53 /// discard the value on top of the stack. emitted at statement-expression
54 /// boundaries and after expressions whose result is unused.
55 Pop = 1,
56 /// duplicate the value on top of the stack. used by `MATCH_VARIANT` chains
57 /// so multiple arms can test the same scrutinee.
58 Dup = 2,
59 // ---- locals + globals ----
60 /// read local slot `u16` and push its value. slots are numbered 0..N per
61 /// call frame; parameters occupy slots 0..argc.
62 GetLocal = 3,
63 /// pop the top value and store it into local slot `u16`. used by `let mut`
64 /// rebinding and by loop-variable updates.
65 SetLocal = 4,
66 /// read the global variable at index `u16` and push its value. globals are
67 /// keyed by name in the `Program.globals` table.
68 GetGlobal = 5,
69 /// pop the top value and store it into the global at index `u16`. Qala v1
70 /// has no top-level mutable bindings, so this is reserved for forward
71 /// compatibility.
72 SetGlobal = 6,
73 // ---- i64 arithmetic + negation ----
74 /// pop two i64 values, push their sum. codegen emits this only when both
75 /// operands are statically `i64`-typed; constant folding intercepts the
76 /// all-literal case at codegen time.
77 Add = 7,
78 /// pop two i64 values, push their difference (lhs - rhs).
79 Sub = 8,
80 /// pop two i64 values, push their product.
81 Mul = 9,
82 /// pop two i64 values, push their quotient (truncated toward zero).
83 Div = 10,
84 /// pop two i64 values, push their remainder (Rust's `%` semantics).
85 Mod = 11,
86 /// pop one i64, push its negation. emitted by the unary `-` operator on
87 /// i64-typed expressions.
88 Neg = 12,
89 // ---- f64 arithmetic + negation ----
90 /// pop two f64 values, push their sum (IEEE 754).
91 FAdd = 13,
92 /// pop two f64 values, push their difference (IEEE 754).
93 FSub = 14,
94 /// pop two f64 values, push their product (IEEE 754).
95 FMul = 15,
96 /// pop two f64 values, push their quotient (IEEE 754).
97 FDiv = 16,
98 /// pop one f64, push its negation (sign-bit flip).
99 FNeg = 17,
100 // ---- comparisons (i64 / bool / str -- the VM picks by operand type) ----
101 /// pop two values of equal type (i64, bool, or str), push the bool result
102 /// of `lhs == rhs`. the VM dispatches by operand type at runtime.
103 Eq = 18,
104 /// pop two values of equal type, push `lhs != rhs`.
105 Ne = 19,
106 /// pop two values of equal type, push `lhs < rhs`.
107 Lt = 20,
108 /// pop two values of equal type, push `lhs <= rhs`.
109 Le = 21,
110 /// pop two values of equal type, push `lhs > rhs`.
111 Gt = 22,
112 /// pop two values of equal type, push `lhs >= rhs`.
113 Ge = 23,
114 // ---- f64 comparisons ----
115 /// pop two f64 values, push `lhs == rhs`. follows IEEE 754: `NaN == NaN`
116 /// is `false`.
117 FEq = 24,
118 /// pop two f64 values, push `lhs != rhs`. follows IEEE 754: `NaN != NaN`
119 /// is `true`.
120 FNe = 25,
121 /// pop two f64 values, push `lhs < rhs` (IEEE 754).
122 FLt = 26,
123 /// pop two f64 values, push `lhs <= rhs` (IEEE 754).
124 FLe = 27,
125 /// pop two f64 values, push `lhs > rhs` (IEEE 754).
126 FGt = 28,
127 /// pop two f64 values, push `lhs >= rhs` (IEEE 754).
128 FGe = 29,
129 // ---- logic ----
130 /// pop one bool, push its negation. short-circuiting `&&` / `||` compile
131 /// to jump patterns, not dedicated opcodes; this is the only logic op.
132 Not = 30,
133 // ---- control flow ----
134 /// branch by the signed `i16` offset relative to the byte AFTER the
135 /// operand. negative offsets allowed for backward jumps (loops).
136 Jump = 31,
137 /// pop one bool; if `false`, branch by the signed `i16` offset relative
138 /// to the byte AFTER the operand. otherwise fall through.
139 JumpIfFalse = 32,
140 /// pop one bool; if `true`, branch by the signed `i16` offset relative to
141 /// the byte AFTER the operand. otherwise fall through. emitted by the
142 /// peephole rewrite `NOT; JUMP_IF_FALSE` -> `JUMP_IF_TRUE`.
143 JumpIfTrue = 33,
144 /// call the function at `Program.chunks[u16]` with `u8` arguments already
145 /// on the stack (top is the rightmost arg). pushes the returned value
146 /// (or [`crate::value::Value::Void`] for void-returning functions).
147 Call = 34,
148 /// return from the current call frame; the value on top of the stack
149 /// becomes the call's result. void-returning functions push
150 /// [`crate::value::Value::Void`] before this opcode.
151 Return = 35,
152 // ---- construction + access ----
153 /// pop `u16` values off the stack (top is the last element), push a heap
154 /// array containing them in stack order.
155 MakeArray = 36,
156 /// pop `u16` values off the stack (top is the last element), push a heap
157 /// tuple containing them in stack order.
158 MakeTuple = 37,
159 /// build a heap struct. the `u16` operand is a struct id -- an index into
160 /// `Program.structs`, NOT a bare field count. the VM reads the field
161 /// count from `Program.structs[id].field_count`, pops that many values
162 /// off the stack (top is the last field's value), and labels the struct
163 /// with `Program.structs[id].name`. the field order is locked by the
164 /// struct declaration; codegen emits the values in declaration order so
165 /// the VM pairs them without a per-field name index.
166 MakeStruct = 38,
167 /// construct an enum variant value: variant id `u16`, then `u8` payload
168 /// values already on the stack (top is the rightmost payload field). the
169 /// VM keeps the variant id and payload as a heap object.
170 MakeEnumVariant = 39,
171 /// pop an index (i64) then an array value, push the array's element at
172 /// that index. out-of-bounds is a runtime error.
173 Index = 40,
174 /// pop a struct value, push the field at name-pool index `u16`. the VM
175 /// resolves the name to the struct's declared field offset.
176 Field = 41,
177 /// pop an array (or string), push its length as i64.
178 Len = 42,
179 // ---- strings + interpolation ----
180 /// pop one value, push its string form. used to materialise interpolated
181 /// segments whose static type is not already `str`.
182 ToStr = 43,
183 /// pop `u16` values off the stack (in stack order, top last), concatenate
184 /// them as strings, push the result. used to materialise string
185 /// interpolation.
186 ConcatN = 44,
187 // ---- match dispatch ----
188 /// test the value on top of the stack against variant id `u16`; on match,
189 /// leave the destructured payload on the stack; on miss, branch by the
190 /// signed `i16` offset. consumes the scrutinee on match, leaves it on
191 /// miss (so a chain of `MATCH_VARIANT` over multiple arms tests against
192 /// the same scrutinee via `DUP`).
193 MatchVariant = 45,
194 // ---- sentinel ----
195 /// sentinel discriminant `0xFF`; never emitted by codegen but useful as
196 /// the disassembler's "unknown byte" marker. the VM treats it as an
197 /// error.
198 Halt = 0xFF,
199}
200
201impl Opcode {
202 /// safe reverse lookup: the disassembler's only entry from raw bytes back
203 /// to a typed [`Opcode`]. returns `None` for any undefined discriminant
204 /// rather than the UB that a `transmute` would risk. the match compiles
205 /// to the same branch table a `transmute`-based decode would, so the
206 /// safety is free.
207 pub fn from_u8(b: u8) -> Option<Opcode> {
208 match b {
209 0 => Some(Opcode::Const),
210 1 => Some(Opcode::Pop),
211 2 => Some(Opcode::Dup),
212 3 => Some(Opcode::GetLocal),
213 4 => Some(Opcode::SetLocal),
214 5 => Some(Opcode::GetGlobal),
215 6 => Some(Opcode::SetGlobal),
216 7 => Some(Opcode::Add),
217 8 => Some(Opcode::Sub),
218 9 => Some(Opcode::Mul),
219 10 => Some(Opcode::Div),
220 11 => Some(Opcode::Mod),
221 12 => Some(Opcode::Neg),
222 13 => Some(Opcode::FAdd),
223 14 => Some(Opcode::FSub),
224 15 => Some(Opcode::FMul),
225 16 => Some(Opcode::FDiv),
226 17 => Some(Opcode::FNeg),
227 18 => Some(Opcode::Eq),
228 19 => Some(Opcode::Ne),
229 20 => Some(Opcode::Lt),
230 21 => Some(Opcode::Le),
231 22 => Some(Opcode::Gt),
232 23 => Some(Opcode::Ge),
233 24 => Some(Opcode::FEq),
234 25 => Some(Opcode::FNe),
235 26 => Some(Opcode::FLt),
236 27 => Some(Opcode::FLe),
237 28 => Some(Opcode::FGt),
238 29 => Some(Opcode::FGe),
239 30 => Some(Opcode::Not),
240 31 => Some(Opcode::Jump),
241 32 => Some(Opcode::JumpIfFalse),
242 33 => Some(Opcode::JumpIfTrue),
243 34 => Some(Opcode::Call),
244 35 => Some(Opcode::Return),
245 36 => Some(Opcode::MakeArray),
246 37 => Some(Opcode::MakeTuple),
247 38 => Some(Opcode::MakeStruct),
248 39 => Some(Opcode::MakeEnumVariant),
249 40 => Some(Opcode::Index),
250 41 => Some(Opcode::Field),
251 42 => Some(Opcode::Len),
252 43 => Some(Opcode::ToStr),
253 44 => Some(Opcode::ConcatN),
254 45 => Some(Opcode::MatchVariant),
255 0xFF => Some(Opcode::Halt),
256 _ => None,
257 }
258 }
259
260 /// the locked uppercase identifier per [`Opcode`] used by the
261 /// disassembler and the playground bytecode panel. one line per variant;
262 /// a missing arm fails to compile. the strings are part of the public
263 /// disassembler contract.
264 pub fn name(self) -> &'static str {
265 match self {
266 Opcode::Const => "CONST",
267 Opcode::Pop => "POP",
268 Opcode::Dup => "DUP",
269 Opcode::GetLocal => "GET_LOCAL",
270 Opcode::SetLocal => "SET_LOCAL",
271 Opcode::GetGlobal => "GET_GLOBAL",
272 Opcode::SetGlobal => "SET_GLOBAL",
273 Opcode::Add => "ADD",
274 Opcode::Sub => "SUB",
275 Opcode::Mul => "MUL",
276 Opcode::Div => "DIV",
277 Opcode::Mod => "MOD",
278 Opcode::Neg => "NEG",
279 Opcode::FAdd => "F_ADD",
280 Opcode::FSub => "F_SUB",
281 Opcode::FMul => "F_MUL",
282 Opcode::FDiv => "F_DIV",
283 Opcode::FNeg => "F_NEG",
284 Opcode::Eq => "EQ",
285 Opcode::Ne => "NE",
286 Opcode::Lt => "LT",
287 Opcode::Le => "LE",
288 Opcode::Gt => "GT",
289 Opcode::Ge => "GE",
290 Opcode::FEq => "F_EQ",
291 Opcode::FNe => "F_NE",
292 Opcode::FLt => "F_LT",
293 Opcode::FLe => "F_LE",
294 Opcode::FGt => "F_GT",
295 Opcode::FGe => "F_GE",
296 Opcode::Not => "NOT",
297 Opcode::Jump => "JUMP",
298 Opcode::JumpIfFalse => "JUMP_IF_FALSE",
299 Opcode::JumpIfTrue => "JUMP_IF_TRUE",
300 Opcode::Call => "CALL",
301 Opcode::Return => "RETURN",
302 Opcode::MakeArray => "MAKE_ARRAY",
303 Opcode::MakeTuple => "MAKE_TUPLE",
304 Opcode::MakeStruct => "MAKE_STRUCT",
305 Opcode::MakeEnumVariant => "MAKE_ENUM_VARIANT",
306 Opcode::Index => "INDEX",
307 Opcode::Field => "FIELD",
308 Opcode::Len => "LEN",
309 Opcode::ToStr => "TO_STR",
310 Opcode::ConcatN => "CONCAT_N",
311 Opcode::MatchVariant => "MATCH_VARIANT",
312 Opcode::Halt => "HALT",
313 }
314 }
315
316 /// the number of operand bytes following this opcode in the instruction
317 /// stream. used by the peephole optimizer's instruction-step function
318 /// and by [`crate::chunk::Chunk::disassemble`] to skip past operand
319 /// bytes when walking the byte stream. variants reading no operand
320 /// return 0; variants reading a `u16` return 2; variants reading a
321 /// `u16 + u8` return 3; variants reading a `u16 + i16` return 4. v1 has
322 /// no wider operand layouts.
323 pub fn operand_bytes(self) -> u8 {
324 match self {
325 // two-byte operands (u16 index or i16 offset)
326 Opcode::Const
327 | Opcode::GetLocal
328 | Opcode::SetLocal
329 | Opcode::GetGlobal
330 | Opcode::SetGlobal
331 | Opcode::Jump
332 | Opcode::JumpIfFalse
333 | Opcode::JumpIfTrue
334 | Opcode::MakeArray
335 | Opcode::MakeTuple
336 | Opcode::MakeStruct
337 | Opcode::Field
338 | Opcode::ConcatN => 2,
339 // u16 + u8 (3 bytes total)
340 Opcode::Call | Opcode::MakeEnumVariant => 3,
341 // u16 + i16 (4 bytes total)
342 Opcode::MatchVariant => 4,
343 // zero-operand: stack ops, arithmetic, comparison, logic, return, halt
344 Opcode::Pop
345 | Opcode::Dup
346 | Opcode::Add
347 | Opcode::Sub
348 | Opcode::Mul
349 | Opcode::Div
350 | Opcode::Mod
351 | Opcode::Neg
352 | Opcode::FAdd
353 | Opcode::FSub
354 | Opcode::FMul
355 | Opcode::FDiv
356 | Opcode::FNeg
357 | Opcode::Eq
358 | Opcode::Ne
359 | Opcode::Lt
360 | Opcode::Le
361 | Opcode::Gt
362 | Opcode::Ge
363 | Opcode::FEq
364 | Opcode::FNe
365 | Opcode::FLt
366 | Opcode::FLe
367 | Opcode::FGt
368 | Opcode::FGe
369 | Opcode::Not
370 | Opcode::Return
371 | Opcode::Index
372 | Opcode::Len
373 | Opcode::ToStr
374 | Opcode::Halt => 0,
375 }
376 }
377}
378
379#[cfg(test)]
380mod tests {
381 use super::*;
382 use std::collections::BTreeSet;
383
384 /// the source of truth for the variant list -- whenever a new variant is
385 /// added to the enum, this constant must grow in lockstep, and the
386 /// round-trip test will fail until it does.
387 const ALL: &[Opcode] = &[
388 Opcode::Const,
389 Opcode::Pop,
390 Opcode::Dup,
391 Opcode::GetLocal,
392 Opcode::SetLocal,
393 Opcode::GetGlobal,
394 Opcode::SetGlobal,
395 Opcode::Add,
396 Opcode::Sub,
397 Opcode::Mul,
398 Opcode::Div,
399 Opcode::Mod,
400 Opcode::Neg,
401 Opcode::FAdd,
402 Opcode::FSub,
403 Opcode::FMul,
404 Opcode::FDiv,
405 Opcode::FNeg,
406 Opcode::Eq,
407 Opcode::Ne,
408 Opcode::Lt,
409 Opcode::Le,
410 Opcode::Gt,
411 Opcode::Ge,
412 Opcode::FEq,
413 Opcode::FNe,
414 Opcode::FLt,
415 Opcode::FLe,
416 Opcode::FGt,
417 Opcode::FGe,
418 Opcode::Not,
419 Opcode::Jump,
420 Opcode::JumpIfFalse,
421 Opcode::JumpIfTrue,
422 Opcode::Call,
423 Opcode::Return,
424 Opcode::MakeArray,
425 Opcode::MakeTuple,
426 Opcode::MakeStruct,
427 Opcode::MakeEnumVariant,
428 Opcode::Index,
429 Opcode::Field,
430 Opcode::Len,
431 Opcode::ToStr,
432 Opcode::ConcatN,
433 Opcode::MatchVariant,
434 Opcode::Halt,
435 ];
436
437 /// the locked zero-operand cluster: every variant in this list must
438 /// return `operand_bytes() == 0`. any new opcode that should be
439 /// zero-operand belongs here and any group migration is intentional.
440 const ZERO_OPERAND: &[Opcode] = &[
441 Opcode::Pop,
442 Opcode::Dup,
443 Opcode::Add,
444 Opcode::Sub,
445 Opcode::Mul,
446 Opcode::Div,
447 Opcode::Mod,
448 Opcode::Neg,
449 Opcode::FAdd,
450 Opcode::FSub,
451 Opcode::FMul,
452 Opcode::FDiv,
453 Opcode::FNeg,
454 Opcode::Eq,
455 Opcode::Ne,
456 Opcode::Lt,
457 Opcode::Le,
458 Opcode::Gt,
459 Opcode::Ge,
460 Opcode::FEq,
461 Opcode::FNe,
462 Opcode::FLt,
463 Opcode::FLe,
464 Opcode::FGt,
465 Opcode::FGe,
466 Opcode::Not,
467 Opcode::Return,
468 Opcode::Index,
469 Opcode::Len,
470 Opcode::ToStr,
471 Opcode::Halt,
472 ];
473
474 /// the locked two-operand cluster: every variant in this list must
475 /// return `operand_bytes() == 2`. covers u16 indices and i16 offsets.
476 const TWO_OPERAND: &[Opcode] = &[
477 Opcode::Const,
478 Opcode::GetLocal,
479 Opcode::SetLocal,
480 Opcode::GetGlobal,
481 Opcode::SetGlobal,
482 Opcode::Jump,
483 Opcode::JumpIfFalse,
484 Opcode::JumpIfTrue,
485 Opcode::MakeArray,
486 Opcode::MakeTuple,
487 Opcode::MakeStruct,
488 Opcode::Field,
489 Opcode::ConcatN,
490 ];
491
492 /// the locked three-operand cluster: u16 + u8 layouts.
493 const THREE_OPERAND: &[Opcode] = &[Opcode::Call, Opcode::MakeEnumVariant];
494
495 /// the locked four-operand cluster: u16 + i16 layouts. only
496 /// [`Opcode::MatchVariant`] in v1.
497 const FOUR_OPERAND: &[Opcode] = &[Opcode::MatchVariant];
498
499 #[test]
500 fn opening_discriminants_are_dense_from_zero() {
501 // the byte layout is part of the locked bytecode format; pin the
502 // first three discriminants so a future renumbering of the enum is
503 // caught here rather than silently in the disassembler.
504 assert_eq!(Opcode::Const as u8, 0);
505 assert_eq!(Opcode::Pop as u8, 1);
506 assert_eq!(Opcode::Dup as u8, 2);
507 }
508
509 #[test]
510 fn halt_uses_the_sentinel_discriminant() {
511 // the sentinel marker -- never emitted by codegen but the locked
512 // "unknown byte" output for the disassembler.
513 assert_eq!(Opcode::Halt as u8, 0xFF);
514 }
515
516 #[test]
517 fn from_u8_round_trips_every_variant() {
518 // for every defined opcode, encoding to a byte and decoding back
519 // yields the same variant.
520 assert_eq!(ALL.len(), 47, "ALL must list all 47 opcodes");
521 for op in ALL {
522 assert_eq!(
523 Opcode::from_u8(*op as u8),
524 Some(*op),
525 "round-trip failed for {op:?}",
526 );
527 }
528 }
529
530 #[test]
531 fn from_u8_returns_none_for_every_undefined_discriminant() {
532 // sweep every byte 0..=255. for defined discriminants the decoded
533 // variant's `as u8` must match the input; for undefined ones the
534 // result must be `None`. this catches both missing arms and arms
535 // that decode to the wrong variant.
536 for b in 0u8..=255 {
537 match Opcode::from_u8(b) {
538 Some(op) => assert_eq!(
539 op as u8, b,
540 "from_u8({b}) returned the wrong variant {op:?}",
541 ),
542 None => {
543 // the undefined set is exactly the bytes not present
544 // in the active discriminant set.
545 let defined = ALL.iter().any(|op| *op as u8 == b);
546 assert!(!defined, "byte {b} is defined but from_u8 returned None");
547 }
548 }
549 }
550 // spot-check the gap between the dense set and the sentinel.
551 assert_eq!(Opcode::from_u8(46), None);
552 assert_eq!(Opcode::from_u8(100), None);
553 assert_eq!(Opcode::from_u8(200), None);
554 assert_eq!(Opcode::from_u8(254), None);
555 }
556
557 #[test]
558 fn name_returns_the_locked_uppercase_string_per_variant() {
559 let cases: &[(Opcode, &str)] = &[
560 (Opcode::Const, "CONST"),
561 (Opcode::Pop, "POP"),
562 (Opcode::Dup, "DUP"),
563 (Opcode::GetLocal, "GET_LOCAL"),
564 (Opcode::SetLocal, "SET_LOCAL"),
565 (Opcode::GetGlobal, "GET_GLOBAL"),
566 (Opcode::SetGlobal, "SET_GLOBAL"),
567 (Opcode::Add, "ADD"),
568 (Opcode::Sub, "SUB"),
569 (Opcode::Mul, "MUL"),
570 (Opcode::Div, "DIV"),
571 (Opcode::Mod, "MOD"),
572 (Opcode::Neg, "NEG"),
573 (Opcode::FAdd, "F_ADD"),
574 (Opcode::FSub, "F_SUB"),
575 (Opcode::FMul, "F_MUL"),
576 (Opcode::FDiv, "F_DIV"),
577 (Opcode::FNeg, "F_NEG"),
578 (Opcode::Eq, "EQ"),
579 (Opcode::Ne, "NE"),
580 (Opcode::Lt, "LT"),
581 (Opcode::Le, "LE"),
582 (Opcode::Gt, "GT"),
583 (Opcode::Ge, "GE"),
584 (Opcode::FEq, "F_EQ"),
585 (Opcode::FNe, "F_NE"),
586 (Opcode::FLt, "F_LT"),
587 (Opcode::FLe, "F_LE"),
588 (Opcode::FGt, "F_GT"),
589 (Opcode::FGe, "F_GE"),
590 (Opcode::Not, "NOT"),
591 (Opcode::Jump, "JUMP"),
592 (Opcode::JumpIfFalse, "JUMP_IF_FALSE"),
593 (Opcode::JumpIfTrue, "JUMP_IF_TRUE"),
594 (Opcode::Call, "CALL"),
595 (Opcode::Return, "RETURN"),
596 (Opcode::MakeArray, "MAKE_ARRAY"),
597 (Opcode::MakeTuple, "MAKE_TUPLE"),
598 (Opcode::MakeStruct, "MAKE_STRUCT"),
599 (Opcode::MakeEnumVariant, "MAKE_ENUM_VARIANT"),
600 (Opcode::Index, "INDEX"),
601 (Opcode::Field, "FIELD"),
602 (Opcode::Len, "LEN"),
603 (Opcode::ToStr, "TO_STR"),
604 (Opcode::ConcatN, "CONCAT_N"),
605 (Opcode::MatchVariant, "MATCH_VARIANT"),
606 (Opcode::Halt, "HALT"),
607 ];
608 assert_eq!(cases.len(), ALL.len(), "name table missing a variant");
609 for (op, expected) in cases {
610 assert_eq!(op.name(), *expected, "wrong name for {op:?}");
611 assert!(!op.name().is_empty(), "name() returned empty for {op:?}");
612 }
613 }
614
615 #[test]
616 fn name_is_unique_per_variant() {
617 // collect every variant's name into a sorted set; the set's len
618 // must equal ALL.len(), or two variants accidentally share a name.
619 // a copy-paste bug here would corrupt disassembler output silently.
620 let names: BTreeSet<&'static str> = ALL.iter().map(|op| op.name()).collect();
621 assert_eq!(
622 names.len(),
623 ALL.len(),
624 "duplicate names found: {names:?} (expected {} unique)",
625 ALL.len(),
626 );
627 }
628
629 #[test]
630 fn operand_bytes_matches_the_locked_table_per_variant() {
631 let cases: &[(Opcode, u8)] = &[
632 (Opcode::Const, 2),
633 (Opcode::Pop, 0),
634 (Opcode::Dup, 0),
635 (Opcode::GetLocal, 2),
636 (Opcode::SetLocal, 2),
637 (Opcode::GetGlobal, 2),
638 (Opcode::SetGlobal, 2),
639 (Opcode::Add, 0),
640 (Opcode::Sub, 0),
641 (Opcode::Mul, 0),
642 (Opcode::Div, 0),
643 (Opcode::Mod, 0),
644 (Opcode::Neg, 0),
645 (Opcode::FAdd, 0),
646 (Opcode::FSub, 0),
647 (Opcode::FMul, 0),
648 (Opcode::FDiv, 0),
649 (Opcode::FNeg, 0),
650 (Opcode::Eq, 0),
651 (Opcode::Ne, 0),
652 (Opcode::Lt, 0),
653 (Opcode::Le, 0),
654 (Opcode::Gt, 0),
655 (Opcode::Ge, 0),
656 (Opcode::FEq, 0),
657 (Opcode::FNe, 0),
658 (Opcode::FLt, 0),
659 (Opcode::FLe, 0),
660 (Opcode::FGt, 0),
661 (Opcode::FGe, 0),
662 (Opcode::Not, 0),
663 (Opcode::Jump, 2),
664 (Opcode::JumpIfFalse, 2),
665 (Opcode::JumpIfTrue, 2),
666 (Opcode::Call, 3),
667 (Opcode::Return, 0),
668 (Opcode::MakeArray, 2),
669 (Opcode::MakeTuple, 2),
670 (Opcode::MakeStruct, 2),
671 (Opcode::MakeEnumVariant, 3),
672 (Opcode::Index, 0),
673 (Opcode::Field, 2),
674 (Opcode::Len, 0),
675 (Opcode::ToStr, 0),
676 (Opcode::ConcatN, 2),
677 (Opcode::MatchVariant, 4),
678 (Opcode::Halt, 0),
679 ];
680 assert_eq!(
681 cases.len(),
682 ALL.len(),
683 "operand_bytes table missing a variant"
684 );
685 for (op, expected) in cases {
686 assert_eq!(
687 op.operand_bytes(),
688 *expected,
689 "wrong operand_bytes for {op:?}",
690 );
691 }
692 }
693
694 #[test]
695 fn operand_bytes_is_bounded_by_four_across_every_variant() {
696 // v1 has no opcode with more than 4 trailing bytes. a future opcode
697 // wider than that must lift this assertion and the disassembler's
698 // operand-reading paths in lockstep.
699 for op in ALL {
700 assert!(
701 op.operand_bytes() <= 4,
702 "{op:?} reports operand_bytes={} > 4",
703 op.operand_bytes(),
704 );
705 }
706 }
707
708 #[test]
709 fn operand_width_groups_partition_the_variant_set() {
710 // every variant in the zero-operand cluster reports 0.
711 for op in ZERO_OPERAND {
712 assert_eq!(
713 op.operand_bytes(),
714 0,
715 "{op:?} listed as zero-operand but reports {}",
716 op.operand_bytes(),
717 );
718 }
719 // every variant in the two-operand cluster reports 2.
720 for op in TWO_OPERAND {
721 assert_eq!(
722 op.operand_bytes(),
723 2,
724 "{op:?} listed as two-operand but reports {}",
725 op.operand_bytes(),
726 );
727 }
728 // every variant in the three-operand cluster reports 3.
729 for op in THREE_OPERAND {
730 assert_eq!(
731 op.operand_bytes(),
732 3,
733 "{op:?} listed as three-operand but reports {}",
734 op.operand_bytes(),
735 );
736 }
737 // every variant in the four-operand cluster reports 4.
738 for op in FOUR_OPERAND {
739 assert_eq!(
740 op.operand_bytes(),
741 4,
742 "{op:?} listed as four-operand but reports {}",
743 op.operand_bytes(),
744 );
745 }
746 // the groups partition the variant set: every opcode belongs to
747 // exactly one group, the union covers ALL.
748 let total =
749 ZERO_OPERAND.len() + TWO_OPERAND.len() + THREE_OPERAND.len() + FOUR_OPERAND.len();
750 assert_eq!(
751 total,
752 ALL.len(),
753 "operand-width groups do not cover every variant",
754 );
755 let mut seen: BTreeSet<u8> = BTreeSet::new();
756 for op in ZERO_OPERAND
757 .iter()
758 .chain(TWO_OPERAND)
759 .chain(THREE_OPERAND)
760 .chain(FOUR_OPERAND)
761 {
762 assert!(
763 seen.insert(*op as u8),
764 "{op:?} appears in more than one operand-width group",
765 );
766 }
767 assert_eq!(seen.len(), ALL.len());
768 }
769}