Skip to main content

qala_compiler/
opcode.rs

1//! the bytecode opcode enum: every byte the codegen emits is one of these
2//! variants encoded as `Opcode::Foo as u8`. dense discriminants 0..=45 for
3//! the real opcodes plus [`Opcode::Halt`] at `0xFF` as a sentinel for the
4//! decoder so an out-of-bounds byte is detectable rather than silently
5//! reinterpretable.
6//!
7//! the layout is part of the bytecode format. adding a new opcode means
8//! three places: the enum variant (with an explicit discriminant), the
9//! [`Opcode::from_u8`] arm, the [`Opcode::name`] arm. [`Opcode::operand_bytes`]
10//! adds a fourth if the new opcode carries operands. the unit tests in this
11//! module loop over every variant and over every byte 0..=255, so a missing
12//! arm or a wrong discriminant fails loudly.
13//!
14//! the disassembler ([`crate::chunk::Chunk::disassemble`], plan 04-03) reads
15//! the byte stream back via [`Opcode::from_u8`] and renders via
16//! [`Opcode::name`]; the peephole optimizer ([`crate::optimizer::peephole`],
17//! plan 04-05) uses [`Opcode::operand_bytes`] as its step function for
18//! walking past whole instructions. these three methods are the public
19//! contract this file owns.
20//!
21//! no `transmute`-based decode (anti-pattern per Phase 4 research): the
22//! match-based reverse lookup compiles to a branch table and is safe on
23//! every byte. zero-cost safety; idiomatic Rust pattern.
24
25/// the first fn-id reserved for the native standard library.
26///
27/// codegen assigns user functions dense ids `0..N` (into `Program::chunks`)
28/// and stdlib functions ids `STDLIB_FN_BASE + i`. the VM's `CALL` handler
29/// branches on this threshold: a fn-id at or above it routes to the native
30/// stdlib dispatcher; below it looks up a user chunk. all three modules that
31/// use this value (`codegen.rs`, `vm.rs`, `stdlib.rs`) import it from here so
32/// there is exactly one definition to change.
33pub const STDLIB_FN_BASE: u16 = 40_000;
34
35/// the bytecode opcode set.
36///
37/// one byte per opcode, dense discriminants 0..=45 for the active set plus
38/// [`Opcode::Halt`] at `0xFF` as the decoder's "unknown / end-of-stream"
39/// sentinel. derives `Copy` because opcodes flow through codegen and the
40/// peephole optimizer by value; one byte is cheaper to copy than to
41/// reference.
42///
43/// the discriminants are part of the bytecode format -- changing them
44/// reshapes every compiled chunk. add new opcodes by appending discriminants
45/// above the current high-water mark; never reuse a freed discriminant.
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
47#[repr(u8)]
48pub enum Opcode {
49    // ---- stack ----
50    /// push the constant pool entry at index `u16` onto the stack. emitted
51    /// for every literal and for every constant-folded result.
52    Const = 0,
53    /// discard the value on top of the stack. emitted at statement-expression
54    /// boundaries and after expressions whose result is unused.
55    Pop = 1,
56    /// duplicate the value on top of the stack. used by `MATCH_VARIANT` chains
57    /// so multiple arms can test the same scrutinee.
58    Dup = 2,
59    // ---- locals + globals ----
60    /// read local slot `u16` and push its value. slots are numbered 0..N per
61    /// call frame; parameters occupy slots 0..argc.
62    GetLocal = 3,
63    /// pop the top value and store it into local slot `u16`. used by `let mut`
64    /// rebinding and by loop-variable updates.
65    SetLocal = 4,
66    /// read the global variable at index `u16` and push its value. globals are
67    /// keyed by name in the `Program.globals` table.
68    GetGlobal = 5,
69    /// pop the top value and store it into the global at index `u16`. Qala v1
70    /// has no top-level mutable bindings, so this is reserved for forward
71    /// compatibility.
72    SetGlobal = 6,
73    // ---- i64 arithmetic + negation ----
74    /// pop two i64 values, push their sum. codegen emits this only when both
75    /// operands are statically `i64`-typed; constant folding intercepts the
76    /// all-literal case at codegen time.
77    Add = 7,
78    /// pop two i64 values, push their difference (lhs - rhs).
79    Sub = 8,
80    /// pop two i64 values, push their product.
81    Mul = 9,
82    /// pop two i64 values, push their quotient (truncated toward zero).
83    Div = 10,
84    /// pop two i64 values, push their remainder (Rust's `%` semantics).
85    Mod = 11,
86    /// pop one i64, push its negation. emitted by the unary `-` operator on
87    /// i64-typed expressions.
88    Neg = 12,
89    // ---- f64 arithmetic + negation ----
90    /// pop two f64 values, push their sum (IEEE 754).
91    FAdd = 13,
92    /// pop two f64 values, push their difference (IEEE 754).
93    FSub = 14,
94    /// pop two f64 values, push their product (IEEE 754).
95    FMul = 15,
96    /// pop two f64 values, push their quotient (IEEE 754).
97    FDiv = 16,
98    /// pop one f64, push its negation (sign-bit flip).
99    FNeg = 17,
100    // ---- comparisons (i64 / bool / str -- the VM picks by operand type) ----
101    /// pop two values of equal type (i64, bool, or str), push the bool result
102    /// of `lhs == rhs`. the VM dispatches by operand type at runtime.
103    Eq = 18,
104    /// pop two values of equal type, push `lhs != rhs`.
105    Ne = 19,
106    /// pop two values of equal type, push `lhs < rhs`.
107    Lt = 20,
108    /// pop two values of equal type, push `lhs <= rhs`.
109    Le = 21,
110    /// pop two values of equal type, push `lhs > rhs`.
111    Gt = 22,
112    /// pop two values of equal type, push `lhs >= rhs`.
113    Ge = 23,
114    // ---- f64 comparisons ----
115    /// pop two f64 values, push `lhs == rhs`. follows IEEE 754: `NaN == NaN`
116    /// is `false`.
117    FEq = 24,
118    /// pop two f64 values, push `lhs != rhs`. follows IEEE 754: `NaN != NaN`
119    /// is `true`.
120    FNe = 25,
121    /// pop two f64 values, push `lhs < rhs` (IEEE 754).
122    FLt = 26,
123    /// pop two f64 values, push `lhs <= rhs` (IEEE 754).
124    FLe = 27,
125    /// pop two f64 values, push `lhs > rhs` (IEEE 754).
126    FGt = 28,
127    /// pop two f64 values, push `lhs >= rhs` (IEEE 754).
128    FGe = 29,
129    // ---- logic ----
130    /// pop one bool, push its negation. short-circuiting `&&` / `||` compile
131    /// to jump patterns, not dedicated opcodes; this is the only logic op.
132    Not = 30,
133    // ---- control flow ----
134    /// branch by the signed `i16` offset relative to the byte AFTER the
135    /// operand. negative offsets allowed for backward jumps (loops).
136    Jump = 31,
137    /// pop one bool; if `false`, branch by the signed `i16` offset relative
138    /// to the byte AFTER the operand. otherwise fall through.
139    JumpIfFalse = 32,
140    /// pop one bool; if `true`, branch by the signed `i16` offset relative to
141    /// the byte AFTER the operand. otherwise fall through. emitted by the
142    /// peephole rewrite `NOT; JUMP_IF_FALSE` -> `JUMP_IF_TRUE`.
143    JumpIfTrue = 33,
144    /// call the function at `Program.chunks[u16]` with `u8` arguments already
145    /// on the stack (top is the rightmost arg). pushes the returned value
146    /// (or [`crate::value::Value::Void`] for void-returning functions).
147    Call = 34,
148    /// return from the current call frame; the value on top of the stack
149    /// becomes the call's result. void-returning functions push
150    /// [`crate::value::Value::Void`] before this opcode.
151    Return = 35,
152    // ---- construction + access ----
153    /// pop `u16` values off the stack (top is the last element), push a heap
154    /// array containing them in stack order.
155    MakeArray = 36,
156    /// pop `u16` values off the stack (top is the last element), push a heap
157    /// tuple containing them in stack order.
158    MakeTuple = 37,
159    /// build a heap struct. the `u16` operand is a struct id -- an index into
160    /// `Program.structs`, NOT a bare field count. the VM reads the field
161    /// count from `Program.structs[id].field_count`, pops that many values
162    /// off the stack (top is the last field's value), and labels the struct
163    /// with `Program.structs[id].name`. the field order is locked by the
164    /// struct declaration; codegen emits the values in declaration order so
165    /// the VM pairs them without a per-field name index.
166    MakeStruct = 38,
167    /// construct an enum variant value: variant id `u16`, then `u8` payload
168    /// values already on the stack (top is the rightmost payload field). the
169    /// VM keeps the variant id and payload as a heap object.
170    MakeEnumVariant = 39,
171    /// pop an index (i64) then an array value, push the array's element at
172    /// that index. out-of-bounds is a runtime error.
173    Index = 40,
174    /// pop a struct value, push the field at name-pool index `u16`. the VM
175    /// resolves the name to the struct's declared field offset.
176    Field = 41,
177    /// pop an array (or string), push its length as i64.
178    Len = 42,
179    // ---- strings + interpolation ----
180    /// pop one value, push its string form. used to materialise interpolated
181    /// segments whose static type is not already `str`.
182    ToStr = 43,
183    /// pop `u16` values off the stack (in stack order, top last), concatenate
184    /// them as strings, push the result. used to materialise string
185    /// interpolation.
186    ConcatN = 44,
187    // ---- match dispatch ----
188    /// test the value on top of the stack against variant id `u16`; on match,
189    /// leave the destructured payload on the stack; on miss, branch by the
190    /// signed `i16` offset. consumes the scrutinee on match, leaves it on
191    /// miss (so a chain of `MATCH_VARIANT` over multiple arms tests against
192    /// the same scrutinee via `DUP`).
193    MatchVariant = 45,
194    // ---- sentinel ----
195    /// sentinel discriminant `0xFF`; never emitted by codegen but useful as
196    /// the disassembler's "unknown byte" marker. the VM treats it as an
197    /// error.
198    Halt = 0xFF,
199}
200
201impl Opcode {
202    /// safe reverse lookup: the disassembler's only entry from raw bytes back
203    /// to a typed [`Opcode`]. returns `None` for any undefined discriminant
204    /// rather than the UB that a `transmute` would risk. the match compiles
205    /// to the same branch table a `transmute`-based decode would, so the
206    /// safety is free.
207    pub fn from_u8(b: u8) -> Option<Opcode> {
208        match b {
209            0 => Some(Opcode::Const),
210            1 => Some(Opcode::Pop),
211            2 => Some(Opcode::Dup),
212            3 => Some(Opcode::GetLocal),
213            4 => Some(Opcode::SetLocal),
214            5 => Some(Opcode::GetGlobal),
215            6 => Some(Opcode::SetGlobal),
216            7 => Some(Opcode::Add),
217            8 => Some(Opcode::Sub),
218            9 => Some(Opcode::Mul),
219            10 => Some(Opcode::Div),
220            11 => Some(Opcode::Mod),
221            12 => Some(Opcode::Neg),
222            13 => Some(Opcode::FAdd),
223            14 => Some(Opcode::FSub),
224            15 => Some(Opcode::FMul),
225            16 => Some(Opcode::FDiv),
226            17 => Some(Opcode::FNeg),
227            18 => Some(Opcode::Eq),
228            19 => Some(Opcode::Ne),
229            20 => Some(Opcode::Lt),
230            21 => Some(Opcode::Le),
231            22 => Some(Opcode::Gt),
232            23 => Some(Opcode::Ge),
233            24 => Some(Opcode::FEq),
234            25 => Some(Opcode::FNe),
235            26 => Some(Opcode::FLt),
236            27 => Some(Opcode::FLe),
237            28 => Some(Opcode::FGt),
238            29 => Some(Opcode::FGe),
239            30 => Some(Opcode::Not),
240            31 => Some(Opcode::Jump),
241            32 => Some(Opcode::JumpIfFalse),
242            33 => Some(Opcode::JumpIfTrue),
243            34 => Some(Opcode::Call),
244            35 => Some(Opcode::Return),
245            36 => Some(Opcode::MakeArray),
246            37 => Some(Opcode::MakeTuple),
247            38 => Some(Opcode::MakeStruct),
248            39 => Some(Opcode::MakeEnumVariant),
249            40 => Some(Opcode::Index),
250            41 => Some(Opcode::Field),
251            42 => Some(Opcode::Len),
252            43 => Some(Opcode::ToStr),
253            44 => Some(Opcode::ConcatN),
254            45 => Some(Opcode::MatchVariant),
255            0xFF => Some(Opcode::Halt),
256            _ => None,
257        }
258    }
259
260    /// the locked uppercase identifier per [`Opcode`] used by the
261    /// disassembler and the playground bytecode panel. one line per variant;
262    /// a missing arm fails to compile. the strings are part of the public
263    /// disassembler contract.
264    pub fn name(self) -> &'static str {
265        match self {
266            Opcode::Const => "CONST",
267            Opcode::Pop => "POP",
268            Opcode::Dup => "DUP",
269            Opcode::GetLocal => "GET_LOCAL",
270            Opcode::SetLocal => "SET_LOCAL",
271            Opcode::GetGlobal => "GET_GLOBAL",
272            Opcode::SetGlobal => "SET_GLOBAL",
273            Opcode::Add => "ADD",
274            Opcode::Sub => "SUB",
275            Opcode::Mul => "MUL",
276            Opcode::Div => "DIV",
277            Opcode::Mod => "MOD",
278            Opcode::Neg => "NEG",
279            Opcode::FAdd => "F_ADD",
280            Opcode::FSub => "F_SUB",
281            Opcode::FMul => "F_MUL",
282            Opcode::FDiv => "F_DIV",
283            Opcode::FNeg => "F_NEG",
284            Opcode::Eq => "EQ",
285            Opcode::Ne => "NE",
286            Opcode::Lt => "LT",
287            Opcode::Le => "LE",
288            Opcode::Gt => "GT",
289            Opcode::Ge => "GE",
290            Opcode::FEq => "F_EQ",
291            Opcode::FNe => "F_NE",
292            Opcode::FLt => "F_LT",
293            Opcode::FLe => "F_LE",
294            Opcode::FGt => "F_GT",
295            Opcode::FGe => "F_GE",
296            Opcode::Not => "NOT",
297            Opcode::Jump => "JUMP",
298            Opcode::JumpIfFalse => "JUMP_IF_FALSE",
299            Opcode::JumpIfTrue => "JUMP_IF_TRUE",
300            Opcode::Call => "CALL",
301            Opcode::Return => "RETURN",
302            Opcode::MakeArray => "MAKE_ARRAY",
303            Opcode::MakeTuple => "MAKE_TUPLE",
304            Opcode::MakeStruct => "MAKE_STRUCT",
305            Opcode::MakeEnumVariant => "MAKE_ENUM_VARIANT",
306            Opcode::Index => "INDEX",
307            Opcode::Field => "FIELD",
308            Opcode::Len => "LEN",
309            Opcode::ToStr => "TO_STR",
310            Opcode::ConcatN => "CONCAT_N",
311            Opcode::MatchVariant => "MATCH_VARIANT",
312            Opcode::Halt => "HALT",
313        }
314    }
315
316    /// the number of operand bytes following this opcode in the instruction
317    /// stream. used by the peephole optimizer's instruction-step function
318    /// and by [`crate::chunk::Chunk::disassemble`] to skip past operand
319    /// bytes when walking the byte stream. variants reading no operand
320    /// return 0; variants reading a `u16` return 2; variants reading a
321    /// `u16 + u8` return 3; variants reading a `u16 + i16` return 4. v1 has
322    /// no wider operand layouts.
323    pub fn operand_bytes(self) -> u8 {
324        match self {
325            // two-byte operands (u16 index or i16 offset)
326            Opcode::Const
327            | Opcode::GetLocal
328            | Opcode::SetLocal
329            | Opcode::GetGlobal
330            | Opcode::SetGlobal
331            | Opcode::Jump
332            | Opcode::JumpIfFalse
333            | Opcode::JumpIfTrue
334            | Opcode::MakeArray
335            | Opcode::MakeTuple
336            | Opcode::MakeStruct
337            | Opcode::Field
338            | Opcode::ConcatN => 2,
339            // u16 + u8 (3 bytes total)
340            Opcode::Call | Opcode::MakeEnumVariant => 3,
341            // u16 + i16 (4 bytes total)
342            Opcode::MatchVariant => 4,
343            // zero-operand: stack ops, arithmetic, comparison, logic, return, halt
344            Opcode::Pop
345            | Opcode::Dup
346            | Opcode::Add
347            | Opcode::Sub
348            | Opcode::Mul
349            | Opcode::Div
350            | Opcode::Mod
351            | Opcode::Neg
352            | Opcode::FAdd
353            | Opcode::FSub
354            | Opcode::FMul
355            | Opcode::FDiv
356            | Opcode::FNeg
357            | Opcode::Eq
358            | Opcode::Ne
359            | Opcode::Lt
360            | Opcode::Le
361            | Opcode::Gt
362            | Opcode::Ge
363            | Opcode::FEq
364            | Opcode::FNe
365            | Opcode::FLt
366            | Opcode::FLe
367            | Opcode::FGt
368            | Opcode::FGe
369            | Opcode::Not
370            | Opcode::Return
371            | Opcode::Index
372            | Opcode::Len
373            | Opcode::ToStr
374            | Opcode::Halt => 0,
375        }
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382    use std::collections::BTreeSet;
383
384    /// the source of truth for the variant list -- whenever a new variant is
385    /// added to the enum, this constant must grow in lockstep, and the
386    /// round-trip test will fail until it does.
387    const ALL: &[Opcode] = &[
388        Opcode::Const,
389        Opcode::Pop,
390        Opcode::Dup,
391        Opcode::GetLocal,
392        Opcode::SetLocal,
393        Opcode::GetGlobal,
394        Opcode::SetGlobal,
395        Opcode::Add,
396        Opcode::Sub,
397        Opcode::Mul,
398        Opcode::Div,
399        Opcode::Mod,
400        Opcode::Neg,
401        Opcode::FAdd,
402        Opcode::FSub,
403        Opcode::FMul,
404        Opcode::FDiv,
405        Opcode::FNeg,
406        Opcode::Eq,
407        Opcode::Ne,
408        Opcode::Lt,
409        Opcode::Le,
410        Opcode::Gt,
411        Opcode::Ge,
412        Opcode::FEq,
413        Opcode::FNe,
414        Opcode::FLt,
415        Opcode::FLe,
416        Opcode::FGt,
417        Opcode::FGe,
418        Opcode::Not,
419        Opcode::Jump,
420        Opcode::JumpIfFalse,
421        Opcode::JumpIfTrue,
422        Opcode::Call,
423        Opcode::Return,
424        Opcode::MakeArray,
425        Opcode::MakeTuple,
426        Opcode::MakeStruct,
427        Opcode::MakeEnumVariant,
428        Opcode::Index,
429        Opcode::Field,
430        Opcode::Len,
431        Opcode::ToStr,
432        Opcode::ConcatN,
433        Opcode::MatchVariant,
434        Opcode::Halt,
435    ];
436
437    /// the locked zero-operand cluster: every variant in this list must
438    /// return `operand_bytes() == 0`. any new opcode that should be
439    /// zero-operand belongs here and any group migration is intentional.
440    const ZERO_OPERAND: &[Opcode] = &[
441        Opcode::Pop,
442        Opcode::Dup,
443        Opcode::Add,
444        Opcode::Sub,
445        Opcode::Mul,
446        Opcode::Div,
447        Opcode::Mod,
448        Opcode::Neg,
449        Opcode::FAdd,
450        Opcode::FSub,
451        Opcode::FMul,
452        Opcode::FDiv,
453        Opcode::FNeg,
454        Opcode::Eq,
455        Opcode::Ne,
456        Opcode::Lt,
457        Opcode::Le,
458        Opcode::Gt,
459        Opcode::Ge,
460        Opcode::FEq,
461        Opcode::FNe,
462        Opcode::FLt,
463        Opcode::FLe,
464        Opcode::FGt,
465        Opcode::FGe,
466        Opcode::Not,
467        Opcode::Return,
468        Opcode::Index,
469        Opcode::Len,
470        Opcode::ToStr,
471        Opcode::Halt,
472    ];
473
474    /// the locked two-operand cluster: every variant in this list must
475    /// return `operand_bytes() == 2`. covers u16 indices and i16 offsets.
476    const TWO_OPERAND: &[Opcode] = &[
477        Opcode::Const,
478        Opcode::GetLocal,
479        Opcode::SetLocal,
480        Opcode::GetGlobal,
481        Opcode::SetGlobal,
482        Opcode::Jump,
483        Opcode::JumpIfFalse,
484        Opcode::JumpIfTrue,
485        Opcode::MakeArray,
486        Opcode::MakeTuple,
487        Opcode::MakeStruct,
488        Opcode::Field,
489        Opcode::ConcatN,
490    ];
491
492    /// the locked three-operand cluster: u16 + u8 layouts.
493    const THREE_OPERAND: &[Opcode] = &[Opcode::Call, Opcode::MakeEnumVariant];
494
495    /// the locked four-operand cluster: u16 + i16 layouts. only
496    /// [`Opcode::MatchVariant`] in v1.
497    const FOUR_OPERAND: &[Opcode] = &[Opcode::MatchVariant];
498
499    #[test]
500    fn opening_discriminants_are_dense_from_zero() {
501        // the byte layout is part of the locked bytecode format; pin the
502        // first three discriminants so a future renumbering of the enum is
503        // caught here rather than silently in the disassembler.
504        assert_eq!(Opcode::Const as u8, 0);
505        assert_eq!(Opcode::Pop as u8, 1);
506        assert_eq!(Opcode::Dup as u8, 2);
507    }
508
509    #[test]
510    fn halt_uses_the_sentinel_discriminant() {
511        // the sentinel marker -- never emitted by codegen but the locked
512        // "unknown byte" output for the disassembler.
513        assert_eq!(Opcode::Halt as u8, 0xFF);
514    }
515
516    #[test]
517    fn from_u8_round_trips_every_variant() {
518        // for every defined opcode, encoding to a byte and decoding back
519        // yields the same variant.
520        assert_eq!(ALL.len(), 47, "ALL must list all 47 opcodes");
521        for op in ALL {
522            assert_eq!(
523                Opcode::from_u8(*op as u8),
524                Some(*op),
525                "round-trip failed for {op:?}",
526            );
527        }
528    }
529
530    #[test]
531    fn from_u8_returns_none_for_every_undefined_discriminant() {
532        // sweep every byte 0..=255. for defined discriminants the decoded
533        // variant's `as u8` must match the input; for undefined ones the
534        // result must be `None`. this catches both missing arms and arms
535        // that decode to the wrong variant.
536        for b in 0u8..=255 {
537            match Opcode::from_u8(b) {
538                Some(op) => assert_eq!(
539                    op as u8, b,
540                    "from_u8({b}) returned the wrong variant {op:?}",
541                ),
542                None => {
543                    // the undefined set is exactly the bytes not present
544                    // in the active discriminant set.
545                    let defined = ALL.iter().any(|op| *op as u8 == b);
546                    assert!(!defined, "byte {b} is defined but from_u8 returned None");
547                }
548            }
549        }
550        // spot-check the gap between the dense set and the sentinel.
551        assert_eq!(Opcode::from_u8(46), None);
552        assert_eq!(Opcode::from_u8(100), None);
553        assert_eq!(Opcode::from_u8(200), None);
554        assert_eq!(Opcode::from_u8(254), None);
555    }
556
557    #[test]
558    fn name_returns_the_locked_uppercase_string_per_variant() {
559        let cases: &[(Opcode, &str)] = &[
560            (Opcode::Const, "CONST"),
561            (Opcode::Pop, "POP"),
562            (Opcode::Dup, "DUP"),
563            (Opcode::GetLocal, "GET_LOCAL"),
564            (Opcode::SetLocal, "SET_LOCAL"),
565            (Opcode::GetGlobal, "GET_GLOBAL"),
566            (Opcode::SetGlobal, "SET_GLOBAL"),
567            (Opcode::Add, "ADD"),
568            (Opcode::Sub, "SUB"),
569            (Opcode::Mul, "MUL"),
570            (Opcode::Div, "DIV"),
571            (Opcode::Mod, "MOD"),
572            (Opcode::Neg, "NEG"),
573            (Opcode::FAdd, "F_ADD"),
574            (Opcode::FSub, "F_SUB"),
575            (Opcode::FMul, "F_MUL"),
576            (Opcode::FDiv, "F_DIV"),
577            (Opcode::FNeg, "F_NEG"),
578            (Opcode::Eq, "EQ"),
579            (Opcode::Ne, "NE"),
580            (Opcode::Lt, "LT"),
581            (Opcode::Le, "LE"),
582            (Opcode::Gt, "GT"),
583            (Opcode::Ge, "GE"),
584            (Opcode::FEq, "F_EQ"),
585            (Opcode::FNe, "F_NE"),
586            (Opcode::FLt, "F_LT"),
587            (Opcode::FLe, "F_LE"),
588            (Opcode::FGt, "F_GT"),
589            (Opcode::FGe, "F_GE"),
590            (Opcode::Not, "NOT"),
591            (Opcode::Jump, "JUMP"),
592            (Opcode::JumpIfFalse, "JUMP_IF_FALSE"),
593            (Opcode::JumpIfTrue, "JUMP_IF_TRUE"),
594            (Opcode::Call, "CALL"),
595            (Opcode::Return, "RETURN"),
596            (Opcode::MakeArray, "MAKE_ARRAY"),
597            (Opcode::MakeTuple, "MAKE_TUPLE"),
598            (Opcode::MakeStruct, "MAKE_STRUCT"),
599            (Opcode::MakeEnumVariant, "MAKE_ENUM_VARIANT"),
600            (Opcode::Index, "INDEX"),
601            (Opcode::Field, "FIELD"),
602            (Opcode::Len, "LEN"),
603            (Opcode::ToStr, "TO_STR"),
604            (Opcode::ConcatN, "CONCAT_N"),
605            (Opcode::MatchVariant, "MATCH_VARIANT"),
606            (Opcode::Halt, "HALT"),
607        ];
608        assert_eq!(cases.len(), ALL.len(), "name table missing a variant");
609        for (op, expected) in cases {
610            assert_eq!(op.name(), *expected, "wrong name for {op:?}");
611            assert!(!op.name().is_empty(), "name() returned empty for {op:?}");
612        }
613    }
614
615    #[test]
616    fn name_is_unique_per_variant() {
617        // collect every variant's name into a sorted set; the set's len
618        // must equal ALL.len(), or two variants accidentally share a name.
619        // a copy-paste bug here would corrupt disassembler output silently.
620        let names: BTreeSet<&'static str> = ALL.iter().map(|op| op.name()).collect();
621        assert_eq!(
622            names.len(),
623            ALL.len(),
624            "duplicate names found: {names:?} (expected {} unique)",
625            ALL.len(),
626        );
627    }
628
629    #[test]
630    fn operand_bytes_matches_the_locked_table_per_variant() {
631        let cases: &[(Opcode, u8)] = &[
632            (Opcode::Const, 2),
633            (Opcode::Pop, 0),
634            (Opcode::Dup, 0),
635            (Opcode::GetLocal, 2),
636            (Opcode::SetLocal, 2),
637            (Opcode::GetGlobal, 2),
638            (Opcode::SetGlobal, 2),
639            (Opcode::Add, 0),
640            (Opcode::Sub, 0),
641            (Opcode::Mul, 0),
642            (Opcode::Div, 0),
643            (Opcode::Mod, 0),
644            (Opcode::Neg, 0),
645            (Opcode::FAdd, 0),
646            (Opcode::FSub, 0),
647            (Opcode::FMul, 0),
648            (Opcode::FDiv, 0),
649            (Opcode::FNeg, 0),
650            (Opcode::Eq, 0),
651            (Opcode::Ne, 0),
652            (Opcode::Lt, 0),
653            (Opcode::Le, 0),
654            (Opcode::Gt, 0),
655            (Opcode::Ge, 0),
656            (Opcode::FEq, 0),
657            (Opcode::FNe, 0),
658            (Opcode::FLt, 0),
659            (Opcode::FLe, 0),
660            (Opcode::FGt, 0),
661            (Opcode::FGe, 0),
662            (Opcode::Not, 0),
663            (Opcode::Jump, 2),
664            (Opcode::JumpIfFalse, 2),
665            (Opcode::JumpIfTrue, 2),
666            (Opcode::Call, 3),
667            (Opcode::Return, 0),
668            (Opcode::MakeArray, 2),
669            (Opcode::MakeTuple, 2),
670            (Opcode::MakeStruct, 2),
671            (Opcode::MakeEnumVariant, 3),
672            (Opcode::Index, 0),
673            (Opcode::Field, 2),
674            (Opcode::Len, 0),
675            (Opcode::ToStr, 0),
676            (Opcode::ConcatN, 2),
677            (Opcode::MatchVariant, 4),
678            (Opcode::Halt, 0),
679        ];
680        assert_eq!(
681            cases.len(),
682            ALL.len(),
683            "operand_bytes table missing a variant"
684        );
685        for (op, expected) in cases {
686            assert_eq!(
687                op.operand_bytes(),
688                *expected,
689                "wrong operand_bytes for {op:?}",
690            );
691        }
692    }
693
694    #[test]
695    fn operand_bytes_is_bounded_by_four_across_every_variant() {
696        // v1 has no opcode with more than 4 trailing bytes. a future opcode
697        // wider than that must lift this assertion and the disassembler's
698        // operand-reading paths in lockstep.
699        for op in ALL {
700            assert!(
701                op.operand_bytes() <= 4,
702                "{op:?} reports operand_bytes={} > 4",
703                op.operand_bytes(),
704            );
705        }
706    }
707
708    #[test]
709    fn operand_width_groups_partition_the_variant_set() {
710        // every variant in the zero-operand cluster reports 0.
711        for op in ZERO_OPERAND {
712            assert_eq!(
713                op.operand_bytes(),
714                0,
715                "{op:?} listed as zero-operand but reports {}",
716                op.operand_bytes(),
717            );
718        }
719        // every variant in the two-operand cluster reports 2.
720        for op in TWO_OPERAND {
721            assert_eq!(
722                op.operand_bytes(),
723                2,
724                "{op:?} listed as two-operand but reports {}",
725                op.operand_bytes(),
726            );
727        }
728        // every variant in the three-operand cluster reports 3.
729        for op in THREE_OPERAND {
730            assert_eq!(
731                op.operand_bytes(),
732                3,
733                "{op:?} listed as three-operand but reports {}",
734                op.operand_bytes(),
735            );
736        }
737        // every variant in the four-operand cluster reports 4.
738        for op in FOUR_OPERAND {
739            assert_eq!(
740                op.operand_bytes(),
741                4,
742                "{op:?} listed as four-operand but reports {}",
743                op.operand_bytes(),
744            );
745        }
746        // the groups partition the variant set: every opcode belongs to
747        // exactly one group, the union covers ALL.
748        let total =
749            ZERO_OPERAND.len() + TWO_OPERAND.len() + THREE_OPERAND.len() + FOUR_OPERAND.len();
750        assert_eq!(
751            total,
752            ALL.len(),
753            "operand-width groups do not cover every variant",
754        );
755        let mut seen: BTreeSet<u8> = BTreeSet::new();
756        for op in ZERO_OPERAND
757            .iter()
758            .chain(TWO_OPERAND)
759            .chain(THREE_OPERAND)
760            .chain(FOUR_OPERAND)
761        {
762            assert!(
763                seen.insert(*op as u8),
764                "{op:?} appears in more than one operand-width group",
765            );
766        }
767        assert_eq!(seen.len(), ALL.len());
768    }
769}