plotnik_bytecode/bytecode/
instructions.rs

1//! Bytecode instruction definitions.
2//!
3//! Instructions are runtime-friendly structs with `from_bytes`/`to_bytes`
4//! methods for bytecode serialization.
5
6use std::num::NonZeroU16;
7
8use super::constants::{SECTION_ALIGN, STEP_SIZE};
9use super::effects::EffectOp;
10use super::nav::Nav;
11use super::node_type_ir::NodeTypeIR;
12
13/// Step address in bytecode (raw u16).
14///
15/// Used for layout addresses, entrypoint targets, bootstrap parameter, etc.
16/// For decoded instruction successors (where 0 = terminal), use [`StepId`] instead.
17pub type StepAddr = u16;
18
19/// Successor step address in decoded instructions.
20///
21/// Uses NonZeroU16 because raw 0 means "terminal" (no successor).
22/// This type is only for decoded instruction successors - use raw `u16`
23/// for addresses in layout, entrypoints, and VM internals.
24#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
25#[repr(transparent)]
26pub struct StepId(pub NonZeroU16);
27
28impl StepId {
29    /// Create a new StepId. Panics if n == 0.
30    #[inline]
31    pub fn new(n: u16) -> Self {
32        Self(NonZeroU16::new(n).expect("StepId cannot be 0"))
33    }
34
35    /// Get the raw u16 value.
36    #[inline]
37    pub fn get(self) -> u16 {
38        self.0.get()
39    }
40}
41
42/// Instruction opcodes (4-bit).
43#[derive(Clone, Copy, PartialEq, Eq, Debug)]
44#[repr(u8)]
45pub enum Opcode {
46    Match8 = 0x0,
47    Match16 = 0x1,
48    Match24 = 0x2,
49    Match32 = 0x3,
50    Match48 = 0x4,
51    Match64 = 0x5,
52    Call = 0x6,
53    Return = 0x7,
54    Trampoline = 0x8,
55}
56
57impl Opcode {
58    pub fn from_u8(v: u8) -> Self {
59        match v {
60            0x0 => Self::Match8,
61            0x1 => Self::Match16,
62            0x2 => Self::Match24,
63            0x3 => Self::Match32,
64            0x4 => Self::Match48,
65            0x5 => Self::Match64,
66            0x6 => Self::Call,
67            0x7 => Self::Return,
68            0x8 => Self::Trampoline,
69            _ => panic!("invalid opcode: {v}"),
70        }
71    }
72
73    /// Instruction size in bytes.
74    pub fn size(self) -> usize {
75        match self {
76            Self::Match8 => 8,
77            Self::Match16 => 16,
78            Self::Match24 => 24,
79            Self::Match32 => 32,
80            Self::Match48 => 48,
81            Self::Match64 => 64,
82            Self::Call => 8,
83            Self::Return => 8,
84            Self::Trampoline => 8,
85        }
86    }
87
88    /// Number of steps this instruction occupies.
89    pub fn step_count(self) -> u16 {
90        (self.size() / STEP_SIZE) as u16
91    }
92
93    /// Whether this is a Match variant.
94    pub fn is_match(self) -> bool {
95        matches!(
96            self,
97            Self::Match8
98                | Self::Match16
99                | Self::Match24
100                | Self::Match32
101                | Self::Match48
102                | Self::Match64
103        )
104    }
105
106    /// Whether this is an extended Match (Match16-64).
107    pub fn is_extended_match(self) -> bool {
108        matches!(
109            self,
110            Self::Match16 | Self::Match24 | Self::Match32 | Self::Match48 | Self::Match64
111        )
112    }
113
114    /// Payload capacity in u16 slots for extended Match variants.
115    pub fn payload_slots(self) -> usize {
116        match self {
117            Self::Match16 => 4,
118            Self::Match24 => 8,
119            Self::Match32 => 12,
120            Self::Match48 => 20,
121            Self::Match64 => 28,
122            _ => 0,
123        }
124    }
125}
126
127/// Match instruction decoded from bytecode.
128///
129/// Provides iterator-based access to effects and successors without allocating.
130#[derive(Clone, Copy, Debug)]
131pub struct Match<'a> {
132    bytes: &'a [u8],
133    /// Segment index (0-3, currently only 0 is used).
134    pub segment: u8,
135    /// Navigation command. `Epsilon` means no cursor movement or node check.
136    pub nav: Nav,
137    /// Node type constraint (Any = wildcard, Named/Anonymous for specific checks).
138    pub node_type: NodeTypeIR,
139    /// Field constraint (None = wildcard).
140    pub node_field: Option<NonZeroU16>,
141    /// Whether this is Match8 (no payload) or extended.
142    is_match8: bool,
143    /// For Match8: the single successor (0 = terminal).
144    match8_next: u16,
145    /// For extended: counts packed into single byte each.
146    pre_count: u8,
147    neg_count: u8,
148    post_count: u8,
149    succ_count: u8,
150    /// Whether this instruction has a predicate (4-byte payload).
151    has_predicate: bool,
152}
153
154impl<'a> Match<'a> {
155    /// Parse a Match instruction from bytecode without allocating.
156    ///
157    /// The slice must start at the instruction and contain at least
158    /// the full instruction size (determined by opcode).
159    ///
160    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
161    #[inline]
162    pub fn from_bytes(bytes: &'a [u8]) -> Self {
163        debug_assert!(bytes.len() >= 8, "Match instruction too short");
164
165        let type_id_byte = bytes[0];
166        // Header byte: segment(2) | node_kind(2) | opcode(4)
167        let segment = (type_id_byte >> 6) & 0x3;
168        let node_kind = (type_id_byte >> 4) & 0x3;
169        let opcode = Opcode::from_u8(type_id_byte & 0xF);
170        debug_assert!(segment == 0, "non-zero segment not yet supported");
171        debug_assert!(opcode.is_match(), "expected Match opcode");
172
173        let nav = Nav::from_byte(bytes[1]);
174        let node_type_val = u16::from_le_bytes([bytes[2], bytes[3]]);
175        let node_type = NodeTypeIR::from_bytes(node_kind, node_type_val);
176        let node_field = NonZeroU16::new(u16::from_le_bytes([bytes[4], bytes[5]]));
177
178        let (is_match8, match8_next, pre_count, neg_count, post_count, succ_count, has_predicate) =
179            if opcode == Opcode::Match8 {
180                let next = u16::from_le_bytes([bytes[6], bytes[7]]);
181                (true, next, 0, 0, 0, if next == 0 { 0 } else { 1 }, false)
182            } else {
183                // counts field layout (16 bits):
184                // bits 15-13: pre_count (3)
185                // bits 12-10: neg_count (3)
186                // bits  9-7:  post_count (3)
187                // bits  6-2:  succ_count (5, max 31)
188                // bit   1:    has_predicate
189                // bit   0:    reserved
190                let counts = u16::from_le_bytes([bytes[6], bytes[7]]);
191                (
192                    false,
193                    0,
194                    ((counts >> 13) & 0x7) as u8,
195                    ((counts >> 10) & 0x7) as u8,
196                    ((counts >> 7) & 0x7) as u8,
197                    ((counts >> 2) & 0x1F) as u8,
198                    (counts >> 1) & 0x1 != 0,
199                )
200            };
201
202        Self {
203            bytes,
204            segment,
205            nav,
206            node_type,
207            node_field,
208            is_match8,
209            match8_next,
210            pre_count,
211            neg_count,
212            post_count,
213            succ_count,
214            has_predicate,
215        }
216    }
217
218    /// Check if this is a terminal (accept) state.
219    #[inline]
220    pub fn is_terminal(&self) -> bool {
221        self.succ_count == 0
222    }
223
224    /// Check if this is an epsilon transition (no node interaction).
225    #[inline]
226    pub fn is_epsilon(&self) -> bool {
227        self.nav == Nav::Epsilon
228    }
229
230    /// Check if this is a Match8 (8-byte fast-path instruction).
231    #[inline]
232    pub fn is_match8(&self) -> bool {
233        self.is_match8
234    }
235
236    /// Number of successors.
237    #[inline]
238    pub fn succ_count(&self) -> usize {
239        self.succ_count as usize
240    }
241
242    /// Get a successor by index.
243    #[inline]
244    pub fn successor(&self, idx: usize) -> StepId {
245        debug_assert!(
246            idx < self.succ_count as usize,
247            "successor index out of bounds"
248        );
249        if self.is_match8 {
250            debug_assert!(idx == 0);
251            debug_assert!(self.match8_next != 0, "terminal has no successors");
252            StepId::new(self.match8_next)
253        } else {
254            let offset = self.succ_offset() + idx * 2;
255            StepId::new(u16::from_le_bytes([
256                self.bytes[offset],
257                self.bytes[offset + 1],
258            ]))
259        }
260    }
261
262    /// Iterate over pre-effects (executed before match attempt).
263    #[inline]
264    pub fn pre_effects(&self) -> impl Iterator<Item = EffectOp> + '_ {
265        let start = 8; // payload starts at byte 8
266        (0..self.pre_count as usize).map(move |i| {
267            let offset = start + i * 2;
268            EffectOp::from_bytes([self.bytes[offset], self.bytes[offset + 1]])
269        })
270    }
271
272    /// Iterate over negated fields (must NOT be present on matched node).
273    #[inline]
274    pub fn neg_fields(&self) -> impl Iterator<Item = u16> + '_ {
275        let start = 8 + (self.pre_count as usize) * 2;
276        (0..self.neg_count as usize).map(move |i| {
277            let offset = start + i * 2;
278            u16::from_le_bytes([self.bytes[offset], self.bytes[offset + 1]])
279        })
280    }
281
282    /// Iterate over post-effects (executed after successful match).
283    #[inline]
284    pub fn post_effects(&self) -> impl Iterator<Item = EffectOp> + '_ {
285        let start = 8 + (self.pre_count as usize + self.neg_count as usize) * 2;
286        (0..self.post_count as usize).map(move |i| {
287            let offset = start + i * 2;
288            EffectOp::from_bytes([self.bytes[offset], self.bytes[offset + 1]])
289        })
290    }
291
292    /// Iterate over successors.
293    #[inline]
294    pub fn successors(&self) -> impl Iterator<Item = StepId> + '_ {
295        (0..self.succ_count as usize).map(move |i| self.successor(i))
296    }
297
298    /// Whether this instruction has a predicate (text filter).
299    #[inline]
300    pub fn has_predicate(&self) -> bool {
301        self.has_predicate
302    }
303
304    /// Get predicate data if present: (op, is_regex, value_ref).
305    ///
306    /// - `op`: operator (0=Eq, 1=Ne, 2=StartsWith, 3=EndsWith, 4=Contains, 5=RegexMatch, 6=RegexNoMatch)
307    /// - `is_regex`: true if value_ref is a RegexTable index, false if StringTable index
308    /// - `value_ref`: index into the appropriate table
309    pub fn predicate(&self) -> Option<(u8, bool, u16)> {
310        if !self.has_predicate {
311            return None;
312        }
313
314        let effects_size =
315            (self.pre_count as usize + self.neg_count as usize + self.post_count as usize) * 2;
316        let offset = 8 + effects_size;
317
318        let op_and_flags = u16::from_le_bytes([self.bytes[offset], self.bytes[offset + 1]]);
319        let op = (op_and_flags & 0xFF) as u8;
320        let is_regex = (op_and_flags >> 8) & 0x1 != 0;
321        let value_ref = u16::from_le_bytes([self.bytes[offset + 2], self.bytes[offset + 3]]);
322
323        Some((op, is_regex, value_ref))
324    }
325
326    /// Byte offset where successors start in the payload.
327    /// Accounts for predicate (4 bytes) if present.
328    #[inline]
329    fn succ_offset(&self) -> usize {
330        let effects_size =
331            (self.pre_count as usize + self.neg_count as usize + self.post_count as usize) * 2;
332        let predicate_size = if self.has_predicate { 4 } else { 0 };
333        8 + effects_size + predicate_size
334    }
335}
336
337/// Call instruction for invoking definitions (recursion).
338#[derive(Clone, Copy, PartialEq, Eq, Debug)]
339pub struct Call {
340    /// Segment index (0-3).
341    pub segment: u8,
342    /// Navigation to apply before jumping to target.
343    pub nav: Nav,
344    /// Field constraint (None = no constraint).
345    pub node_field: Option<NonZeroU16>,
346    /// Return address (current segment).
347    pub next: StepId,
348    /// Callee entry point (target segment from type_id).
349    pub target: StepId,
350}
351
352impl Call {
353    /// Create a new Call instruction.
354    pub fn new(nav: Nav, node_field: Option<NonZeroU16>, next: StepId, target: StepId) -> Self {
355        Self {
356            segment: 0,
357            nav,
358            node_field,
359            next,
360            target,
361        }
362    }
363
364    /// Decode from 8-byte bytecode.
365    ///
366    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
367    /// For Call, node_kind bits are ignored (always 0).
368    pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
369        let type_id_byte = bytes[0];
370        let segment = (type_id_byte >> 6) & 0x3;
371        let opcode = Opcode::from_u8(type_id_byte & 0xF);
372        assert!(
373            segment == 0,
374            "non-zero segment not yet supported: {segment}"
375        );
376        assert_eq!(opcode, Opcode::Call, "expected Call opcode");
377
378        Self {
379            segment,
380            nav: Nav::from_byte(bytes[1]),
381            node_field: NonZeroU16::new(u16::from_le_bytes([bytes[2], bytes[3]])),
382            next: StepId::new(u16::from_le_bytes([bytes[4], bytes[5]])),
383            target: StepId::new(u16::from_le_bytes([bytes[6], bytes[7]])),
384        }
385    }
386
387    /// Encode to 8-byte bytecode.
388    ///
389    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
390    pub fn to_bytes(&self) -> [u8; 8] {
391        let mut bytes = [0u8; 8];
392        // node_kind = 0 for Call
393        bytes[0] = (self.segment << 6) | (Opcode::Call as u8);
394        bytes[1] = self.nav.to_byte();
395        bytes[2..4].copy_from_slice(&self.node_field.map_or(0, |v| v.get()).to_le_bytes());
396        bytes[4..6].copy_from_slice(&self.next.get().to_le_bytes());
397        bytes[6..8].copy_from_slice(&self.target.get().to_le_bytes());
398        bytes
399    }
400
401    pub fn nav(&self) -> Nav {
402        self.nav
403    }
404    pub fn node_field(&self) -> Option<NonZeroU16> {
405        self.node_field
406    }
407    pub fn next(&self) -> StepId {
408        self.next
409    }
410    pub fn target(&self) -> StepId {
411        self.target
412    }
413}
414
415/// Return instruction for returning from definitions.
416#[derive(Clone, Copy, PartialEq, Eq, Debug)]
417pub struct Return {
418    /// Segment index (0-3).
419    pub segment: u8,
420}
421
422impl Return {
423    /// Create a new Return instruction.
424    pub fn new() -> Self {
425        Self { segment: 0 }
426    }
427
428    /// Decode from 8-byte bytecode.
429    ///
430    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
431    /// For Return, node_kind bits are ignored (always 0).
432    pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
433        let type_id_byte = bytes[0];
434        let segment = (type_id_byte >> 6) & 0x3;
435        let opcode = Opcode::from_u8(type_id_byte & 0xF);
436        assert!(
437            segment == 0,
438            "non-zero segment not yet supported: {segment}"
439        );
440        assert_eq!(opcode, Opcode::Return, "expected Return opcode");
441
442        Self { segment }
443    }
444
445    /// Encode to 8-byte bytecode.
446    ///
447    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
448    pub fn to_bytes(&self) -> [u8; 8] {
449        let mut bytes = [0u8; 8];
450        // node_kind = 0 for Return
451        bytes[0] = (self.segment << 6) | (Opcode::Return as u8);
452        // bytes[1..8] are reserved/padding
453        bytes
454    }
455}
456
457impl Default for Return {
458    fn default() -> Self {
459        Self::new()
460    }
461}
462
463/// Trampoline instruction for universal entry.
464///
465/// Like Call, but the target comes from VM context (external parameter)
466/// rather than being encoded in the instruction. Used at address 0 for
467/// the entry preamble: `Obj → Trampoline → EndObj → Accept`.
468#[derive(Clone, Copy, PartialEq, Eq, Debug)]
469pub struct Trampoline {
470    /// Segment index (0-3).
471    pub segment: u8,
472    /// Return address (where to continue after entrypoint returns).
473    pub next: StepId,
474}
475
476impl Trampoline {
477    /// Create a new Trampoline instruction.
478    pub fn new(next: StepId) -> Self {
479        Self { segment: 0, next }
480    }
481
482    /// Decode from 8-byte bytecode.
483    ///
484    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
485    /// For Trampoline, node_kind bits are ignored (always 0).
486    pub(crate) fn from_bytes(bytes: [u8; 8]) -> Self {
487        let type_id_byte = bytes[0];
488        let segment = (type_id_byte >> 6) & 0x3;
489        let opcode = Opcode::from_u8(type_id_byte & 0xF);
490        assert!(
491            segment == 0,
492            "non-zero segment not yet supported: {segment}"
493        );
494        assert_eq!(opcode, Opcode::Trampoline, "expected Trampoline opcode");
495
496        Self {
497            segment,
498            next: StepId::new(u16::from_le_bytes([bytes[2], bytes[3]])),
499        }
500    }
501
502    /// Encode to 8-byte bytecode.
503    ///
504    /// Header byte layout: `segment(2) | node_kind(2) | opcode(4)`
505    pub fn to_bytes(&self) -> [u8; 8] {
506        let mut bytes = [0u8; 8];
507        // node_kind = 0 for Trampoline
508        bytes[0] = (self.segment << 6) | (Opcode::Trampoline as u8);
509        // bytes[1] is padding
510        bytes[2..4].copy_from_slice(&self.next.get().to_le_bytes());
511        // bytes[4..8] are reserved/padding
512        bytes
513    }
514
515    pub fn next(&self) -> StepId {
516        self.next
517    }
518}
519
520/// Select the smallest Match variant that fits the given payload.
521pub fn select_match_opcode(slots_needed: usize) -> Option<Opcode> {
522    if slots_needed == 0 {
523        return Some(Opcode::Match8);
524    }
525    match slots_needed {
526        1..=4 => Some(Opcode::Match16),
527        5..=8 => Some(Opcode::Match24),
528        9..=12 => Some(Opcode::Match32),
529        13..=20 => Some(Opcode::Match48),
530        21..=28 => Some(Opcode::Match64),
531        _ => None, // Too large, must split
532    }
533}
534
535/// Pad a size to the next multiple of SECTION_ALIGN (64 bytes).
536#[inline]
537pub fn align_to_section(size: usize) -> usize {
538    (size + SECTION_ALIGN - 1) & !(SECTION_ALIGN - 1)
539}