plotnik_lib/bytecode/
instructions.rs

1//! Bytecode instruction definitions.
2//!
3//! Instructions are runtime-friendly structs with `from_bytes`/`to_bytes`
4//! methods for bytecode serialization.
5
6use std::num::NonZeroU16;
7
8use super::constants::{SECTION_ALIGN, STEP_SIZE};
9use super::effects::EffectOp;
10use super::ir::NodeTypeIR;
11use super::nav::Nav;
12
13/// Step address in bytecode (raw u16).
14///
15/// Used for layout addresses, entrypoint targets, bootstrap parameter, etc.
16/// For decoded instruction successors (where 0 = terminal), use [`StepId`] instead.
17pub type StepAddr = u16;
18
19/// Successor step address in decoded instructions.
20///
21/// Uses NonZeroU16 because raw 0 means "terminal" (no successor).
22/// This type is only for decoded instruction successors - use raw `u16`
23/// for addresses in layout, entrypoints, and VM internals.
24#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
25#[repr(transparent)]
26pub struct StepId(pub NonZeroU16);
27
28impl StepId {
29    /// Create a new StepId. Panics if n == 0.
30    #[inline]
31    pub fn new(n: u16) -> Self {
32        Self(NonZeroU16::new(n).expect("StepId cannot be 0"))
33    }
34
35    /// Get the raw u16 value.
36    #[inline]
37    pub fn get(self) -> u16 {
38        self.0.get()
39    }
40}
41
42/// Instruction opcodes (4-bit).
43#[derive(Clone, Copy, PartialEq, Eq, Debug)]
44#[repr(u8)]
45pub enum Opcode {
46    Match8 = 0x0,
47    Match16 = 0x1,
48    Match24 = 0x2,
49    Match32 = 0x3,
50    Match48 = 0x4,
51    Match64 = 0x5,
52    Call = 0x6,
53    Return = 0x7,
54    Trampoline = 0x8,
55}
56
57impl Opcode {
58    pub fn from_u8(v: u8) -> Self {
59        match v {
60            0x0 => Self::Match8,
61            0x1 => Self::Match16,
62            0x2 => Self::Match24,
63            0x3 => Self::Match32,
64            0x4 => Self::Match48,
65            0x5 => Self::Match64,
66            0x6 => Self::Call,
67            0x7 => Self::Return,
68            0x8 => Self::Trampoline,
69            _ => panic!("invalid opcode: {v}"),
70        }
71    }
72
73    /// Instruction size in bytes.
74    pub fn size(self) -> usize {
75        match self {
76            Self::Match8 => 8,
77            Self::Match16 => 16,
78            Self::Match24 => 24,
79            Self::Match32 => 32,
80            Self::Match48 => 48,
81            Self::Match64 => 64,
82            Self::Call => 8,
83            Self::Return => 8,
84            Self::Trampoline => 8,
85        }
86    }
87
88    /// Number of steps this instruction occupies.
89    pub fn step_count(self) -> u16 {
90        (self.size() / STEP_SIZE) as u16
91    }
92
93    /// Whether this is a Match variant.
94    pub fn is_match(self) -> bool {
95        matches!(
96            self,
97            Self::Match8
98                | Self::Match16
99                | Self::Match24
100                | Self::Match32
101                | Self::Match48
102                | Self::Match64
103        )
104    }
105
106    /// Whether this is an extended Match (Match16-64).
107    pub fn is_extended_match(self) -> bool {
108        matches!(
109            self,
110            Self::Match16 | Self::Match24 | Self::Match32 | Self::Match48 | Self::Match64
111        )
112    }
113
114    /// Payload capacity in u16 slots for extended Match variants.
115    pub fn payload_slots(self) -> usize {
116        match self {
117            Self::Match16 => 4,
118            Self::Match24 => 8,
119            Self::Match32 => 12,
120            Self::Match48 => 20,
121            Self::Match64 => 28,
122            _ => 0,
123        }
124    }
125}
126
127/// Match instruction decoded from bytecode.
128///
129/// Provides iterator-based access to effects and successors without allocating.
130#[derive(Clone, Copy, Debug)]
131pub struct Match<'a> {
132    bytes: &'a [u8],
133    /// Segment index (0-3, currently only 0 is used).
134    pub segment: u8,
135    /// Navigation command. `Epsilon` means no cursor movement or node check.
136    pub nav: Nav,
137    /// Node type constraint (Any = wildcard, Named/Anonymous for specific checks).
138    pub node_type: NodeTypeIR,
139    /// Field constraint (None = wildcard).
140    pub node_field: Option<NonZeroU16>,
141    /// Whether this is Match8 (no payload) or extended.
142    is_match8: bool,
143    /// For Match8: the single successor (0 = terminal).
144    match8_next: u16,
145    /// For extended: counts packed into single byte each.
146    pre_count: u8,
147    neg_count: u8,
148    post_count: u8,
149    succ_count: u8,
150}
151
152impl<'a> Match<'a> {
153    /// Parse a Match instruction from bytecode without allocating.
154    ///
155    /// The slice must start at the instruction and contain at least
156    /// the full instruction size (determined by opcode).
157    ///
158    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
159    #[inline]
160    pub fn from_bytes(bytes: &'a [u8]) -> Self {
161        debug_assert!(bytes.len() >= 8, "Match instruction too short");
162
163        let type_id_byte = bytes[0];
164        // Header byte: segment(2) | node_kind(2) | opcode(4)
165        let segment = (type_id_byte >> 6) & 0x3;
166        let node_kind = (type_id_byte >> 4) & 0x3;
167        let opcode = Opcode::from_u8(type_id_byte & 0xF);
168        debug_assert!(segment == 0, "non-zero segment not yet supported");
169        debug_assert!(opcode.is_match(), "expected Match opcode");
170
171        let nav = Nav::from_byte(bytes[1]);
172        let node_type_val = u16::from_le_bytes([bytes[2], bytes[3]]);
173        let node_type = NodeTypeIR::from_bytes(node_kind, node_type_val);
174        let node_field = NonZeroU16::new(u16::from_le_bytes([bytes[4], bytes[5]]));
175
176        let (is_match8, match8_next, pre_count, neg_count, post_count, succ_count) =
177            if opcode == Opcode::Match8 {
178                let next = u16::from_le_bytes([bytes[6], bytes[7]]);
179                (true, next, 0, 0, 0, if next == 0 { 0 } else { 1 })
180            } else {
181                let counts = u16::from_le_bytes([bytes[6], bytes[7]]);
182                (
183                    false,
184                    0,
185                    ((counts >> 13) & 0x7) as u8,
186                    ((counts >> 10) & 0x7) as u8,
187                    ((counts >> 7) & 0x7) as u8,
188                    ((counts >> 1) & 0x3F) as u8,
189                )
190            };
191
192        Self {
193            bytes,
194            segment,
195            nav,
196            node_type,
197            node_field,
198            is_match8,
199            match8_next,
200            pre_count,
201            neg_count,
202            post_count,
203            succ_count,
204        }
205    }
206
207    /// Check if this is a terminal (accept) state.
208    #[inline]
209    pub fn is_terminal(&self) -> bool {
210        self.succ_count == 0
211    }
212
213    /// Check if this is an epsilon transition (no node interaction).
214    #[inline]
215    pub fn is_epsilon(&self) -> bool {
216        self.nav == Nav::Epsilon
217    }
218
219    /// Number of successors.
220    #[inline]
221    pub fn succ_count(&self) -> usize {
222        self.succ_count as usize
223    }
224
225    /// Get a successor by index.
226    #[inline]
227    pub fn successor(&self, idx: usize) -> StepId {
228        debug_assert!(
229            idx < self.succ_count as usize,
230            "successor index out of bounds"
231        );
232        if self.is_match8 {
233            debug_assert!(idx == 0);
234            debug_assert!(self.match8_next != 0, "terminal has no successors");
235            StepId::new(self.match8_next)
236        } else {
237            let offset = self.succ_offset() + idx * 2;
238            StepId::new(u16::from_le_bytes([
239                self.bytes[offset],
240                self.bytes[offset + 1],
241            ]))
242        }
243    }
244
245    /// Iterate over pre-effects (executed before match attempt).
246    #[inline]
247    pub fn pre_effects(&self) -> impl Iterator<Item = EffectOp> + '_ {
248        let start = 8; // payload starts at byte 8
249        (0..self.pre_count as usize).map(move |i| {
250            let offset = start + i * 2;
251            EffectOp::from_bytes([self.bytes[offset], self.bytes[offset + 1]])
252        })
253    }
254
255    /// Iterate over negated fields (must NOT be present on matched node).
256    #[inline]
257    pub fn neg_fields(&self) -> impl Iterator<Item = u16> + '_ {
258        let start = 8 + (self.pre_count as usize) * 2;
259        (0..self.neg_count as usize).map(move |i| {
260            let offset = start + i * 2;
261            u16::from_le_bytes([self.bytes[offset], self.bytes[offset + 1]])
262        })
263    }
264
265    /// Iterate over post-effects (executed after successful match).
266    #[inline]
267    pub fn post_effects(&self) -> impl Iterator<Item = EffectOp> + '_ {
268        let start = 8 + (self.pre_count as usize + self.neg_count as usize) * 2;
269        (0..self.post_count as usize).map(move |i| {
270            let offset = start + i * 2;
271            EffectOp::from_bytes([self.bytes[offset], self.bytes[offset + 1]])
272        })
273    }
274
275    /// Iterate over successors.
276    #[inline]
277    pub fn successors(&self) -> impl Iterator<Item = StepId> + '_ {
278        (0..self.succ_count as usize).map(move |i| self.successor(i))
279    }
280
281    /// Byte offset where successors start in the payload.
282    #[inline]
283    fn succ_offset(&self) -> usize {
284        8 + (self.pre_count as usize + self.neg_count as usize + self.post_count as usize) * 2
285    }
286}
287
288/// Call instruction for invoking definitions (recursion).
289#[derive(Clone, Copy, PartialEq, Eq, Debug)]
290pub struct Call {
291    /// Segment index (0-3).
292    pub(crate) segment: u8,
293    /// Navigation to apply before jumping to target.
294    pub(crate) nav: Nav,
295    /// Field constraint (None = no constraint).
296    pub(crate) node_field: Option<NonZeroU16>,
297    /// Return address (current segment).
298    pub(crate) next: StepId,
299    /// Callee entry point (target segment from type_id).
300    pub(crate) target: StepId,
301}
302
303impl Call {
304    /// Create a new Call instruction.
305    pub fn new(nav: Nav, node_field: Option<NonZeroU16>, next: StepId, target: StepId) -> Self {
306        Self {
307            segment: 0,
308            nav,
309            node_field,
310            next,
311            target,
312        }
313    }
314
315    /// Decode from 8-byte bytecode.
316    ///
317    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
318    /// For Call, node_kind bits are ignored (always 0).
319    pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
320        let type_id_byte = bytes[0];
321        let segment = (type_id_byte >> 6) & 0x3;
322        let opcode = Opcode::from_u8(type_id_byte & 0xF);
323        assert!(
324            segment == 0,
325            "non-zero segment not yet supported: {segment}"
326        );
327        assert_eq!(opcode, Opcode::Call, "expected Call opcode");
328
329        Self {
330            segment,
331            nav: Nav::from_byte(bytes[1]),
332            node_field: NonZeroU16::new(u16::from_le_bytes([bytes[2], bytes[3]])),
333            next: StepId::new(u16::from_le_bytes([bytes[4], bytes[5]])),
334            target: StepId::new(u16::from_le_bytes([bytes[6], bytes[7]])),
335        }
336    }
337
338    /// Encode to 8-byte bytecode.
339    ///
340    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
341    pub fn to_bytes(&self) -> [u8; 8] {
342        let mut bytes = [0u8; 8];
343        // node_kind = 0 for Call
344        bytes[0] = (self.segment << 6) | (Opcode::Call as u8);
345        bytes[1] = self.nav.to_byte();
346        bytes[2..4].copy_from_slice(&self.node_field.map_or(0, |v| v.get()).to_le_bytes());
347        bytes[4..6].copy_from_slice(&self.next.get().to_le_bytes());
348        bytes[6..8].copy_from_slice(&self.target.get().to_le_bytes());
349        bytes
350    }
351
352    pub fn nav(&self) -> Nav {
353        self.nav
354    }
355    pub fn node_field(&self) -> Option<NonZeroU16> {
356        self.node_field
357    }
358    pub fn next(&self) -> StepId {
359        self.next
360    }
361    pub fn target(&self) -> StepId {
362        self.target
363    }
364}
365
366/// Return instruction for returning from definitions.
367#[derive(Clone, Copy, PartialEq, Eq, Debug)]
368pub struct Return {
369    /// Segment index (0-3).
370    pub(crate) segment: u8,
371}
372
373impl Return {
374    /// Create a new Return instruction.
375    pub fn new() -> Self {
376        Self { segment: 0 }
377    }
378
379    /// Decode from 8-byte bytecode.
380    ///
381    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
382    /// For Return, node_kind bits are ignored (always 0).
383    pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
384        let type_id_byte = bytes[0];
385        let segment = (type_id_byte >> 6) & 0x3;
386        let opcode = Opcode::from_u8(type_id_byte & 0xF);
387        assert!(
388            segment == 0,
389            "non-zero segment not yet supported: {segment}"
390        );
391        assert_eq!(opcode, Opcode::Return, "expected Return opcode");
392
393        Self { segment }
394    }
395
396    /// Encode to 8-byte bytecode.
397    ///
398    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
399    pub fn to_bytes(&self) -> [u8; 8] {
400        let mut bytes = [0u8; 8];
401        // node_kind = 0 for Return
402        bytes[0] = (self.segment << 6) | (Opcode::Return as u8);
403        // bytes[1..8] are reserved/padding
404        bytes
405    }
406}
407
408impl Default for Return {
409    fn default() -> Self {
410        Self::new()
411    }
412}
413
414/// Trampoline instruction for universal entry.
415///
416/// Like Call, but the target comes from VM context (external parameter)
417/// rather than being encoded in the instruction. Used at address 0 for
418/// the entry preamble: `Obj → Trampoline → EndObj → Accept`.
419#[derive(Clone, Copy, PartialEq, Eq, Debug)]
420pub struct Trampoline {
421    /// Segment index (0-3).
422    pub(crate) segment: u8,
423    /// Return address (where to continue after entrypoint returns).
424    pub(crate) next: StepId,
425}
426
427impl Trampoline {
428    /// Create a new Trampoline instruction.
429    pub fn new(next: StepId) -> Self {
430        Self { segment: 0, next }
431    }
432
433    /// Decode from 8-byte bytecode.
434    ///
435    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
436    /// For Trampoline, node_kind bits are ignored (always 0).
437    pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
438        let type_id_byte = bytes[0];
439        let segment = (type_id_byte >> 6) & 0x3;
440        let opcode = Opcode::from_u8(type_id_byte & 0xF);
441        assert!(
442            segment == 0,
443            "non-zero segment not yet supported: {segment}"
444        );
445        assert_eq!(opcode, Opcode::Trampoline, "expected Trampoline opcode");
446
447        Self {
448            segment,
449            next: StepId::new(u16::from_le_bytes([bytes[2], bytes[3]])),
450        }
451    }
452
453    /// Encode to 8-byte bytecode.
454    ///
455    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
456    pub fn to_bytes(&self) -> [u8; 8] {
457        let mut bytes = [0u8; 8];
458        // node_kind = 0 for Trampoline
459        bytes[0] = (self.segment << 6) | (Opcode::Trampoline as u8);
460        // bytes[1] is padding
461        bytes[2..4].copy_from_slice(&self.next.get().to_le_bytes());
462        // bytes[4..8] are reserved/padding
463        bytes
464    }
465
466    pub fn next(&self) -> StepId {
467        self.next
468    }
469}
470
471/// Select the smallest Match variant that fits the given payload.
472pub fn select_match_opcode(slots_needed: usize) -> Option<Opcode> {
473    if slots_needed == 0 {
474        return Some(Opcode::Match8);
475    }
476    match slots_needed {
477        1..=4 => Some(Opcode::Match16),
478        5..=8 => Some(Opcode::Match24),
479        9..=12 => Some(Opcode::Match32),
480        13..=20 => Some(Opcode::Match48),
481        21..=28 => Some(Opcode::Match64),
482        _ => None, // Too large, must split
483    }
484}
485
486/// Pad a size to the next multiple of SECTION_ALIGN (64 bytes).
487#[inline]
488pub fn align_to_section(size: usize) -> usize {
489    (size + SECTION_ALIGN - 1) & !(SECTION_ALIGN - 1)
490}