rtvm_interpreter/interpreter/
analysis.rs

1use rtvm_primitives::{eof::EofDecodeError, HashSet};
2
3use crate::{
4    instructions::utility::{read_i16, read_u16},
5    opcode,
6    primitives::{
7        bitvec::prelude::{bitvec, BitVec, Lsb0},
8        eof::TypesSection,
9        legacy::JumpTable,
10        Bytecode, Bytes, Eof, LegacyAnalyzedBytecode,
11    },
12    OPCODE_INFO_JUMPTABLE, STACK_LIMIT,
13};
14use std::{sync::Arc, vec, vec::Vec};
15
16const EOF_NON_RETURNING_FUNCTION: u8 = 0x80;
17
18/// Perform bytecode analysis.
19///
20/// The analysis finds and caches valid jump destinations for later execution as an optimization step.
21///
22/// If the bytecode is already analyzed, it is returned as-is.
23#[inline]
24pub fn to_analysed(bytecode: Bytecode) -> Bytecode {
25    let (bytes, len) = match bytecode {
26        Bytecode::LegacyRaw(bytecode) => {
27            let len = bytecode.len();
28            let mut padded_bytecode = Vec::with_capacity(len + 33);
29            padded_bytecode.extend_from_slice(&bytecode);
30            padded_bytecode.resize(len + 33, 0);
31            (Bytes::from(padded_bytecode), len)
32        }
33        n => return n,
34    };
35    let jump_table = analyze(bytes.as_ref());
36
37    Bytecode::LegacyAnalyzed(LegacyAnalyzedBytecode::new(bytes, len, jump_table))
38}
39
40/// Analyze bytecode to build a jump map.
41fn analyze(code: &[u8]) -> JumpTable {
42    let mut jumps: BitVec<u8> = bitvec![u8, Lsb0; 0; code.len()];
43
44    let range = code.as_ptr_range();
45    let start = range.start;
46    let mut iterator = start;
47    let end = range.end;
48    while iterator < end {
49        let opcode = unsafe { *iterator };
50        if opcode::JUMPDEST == opcode {
51            // SAFETY: jumps are max length of the code
52            unsafe { jumps.set_unchecked(iterator.offset_from(start) as usize, true) }
53            iterator = unsafe { iterator.offset(1) };
54        } else {
55            let push_offset = opcode.wrapping_sub(opcode::PUSH1);
56            if push_offset < 32 {
57                // SAFETY: iterator access range is checked in the while loop
58                iterator = unsafe { iterator.offset((push_offset + 2) as isize) };
59            } else {
60                // SAFETY: iterator access range is checked in the while loop
61                iterator = unsafe { iterator.offset(1) };
62            }
63        }
64    }
65
66    JumpTable(Arc::new(jumps))
67}
68
69pub fn validate_raw_eof(bytecode: Bytes) -> Result<Eof, EofError> {
70    let eof = Eof::decode(bytecode)?;
71    validate_eof(&eof)?;
72    Ok(eof)
73}
74
75/// Validate Eof structures.
76pub fn validate_eof(eof: &Eof) -> Result<(), EofError> {
77    // clone is cheap as it is Bytes and a header.
78    let mut queue = vec![eof.clone()];
79
80    while let Some(eof) = queue.pop() {
81        // iterate over types
82        validate_eof_codes(&eof)?;
83        // iterate over containers, convert them to Eof and add to analyze_eof
84        for container in eof.body.container_section {
85            queue.push(Eof::decode(container)?);
86        }
87    }
88
89    // Eof is valid
90    Ok(())
91}
92
93/// Validate EOF
94pub fn validate_eof_codes(eof: &Eof) -> Result<(), EofValidationError> {
95    let mut queued_codes = vec![false; eof.body.code_section.len()];
96    if eof.body.code_section.len() != eof.body.types_section.len() {
97        return Err(EofValidationError::InvalidTypesSection);
98    }
99
100    if eof.body.code_section.is_empty() {
101        // no code sections. This should be already checked in decode.
102        return Err(EofValidationError::NoCodeSections);
103    }
104    // first section is default one.
105    queued_codes[0] = true;
106
107    // the first code section must have a type signature
108    // (0, 0x80, max_stack_height) (0 inputs non-returning function)
109    let first_types = &eof.body.types_section[0];
110    if first_types.inputs != 0 || first_types.outputs != EOF_NON_RETURNING_FUNCTION {
111        return Err(EofValidationError::InvalidTypesSection);
112    }
113
114    // start validation from code section 0.
115    let mut queue = vec![0];
116    while let Some(index) = queue.pop() {
117        let code = &eof.body.code_section[index];
118        let accessed_codes = validate_eof_code(
119            code,
120            eof.header.data_size as usize,
121            index,
122            eof.body.container_section.len(),
123            &eof.body.types_section,
124        )?;
125
126        // queue accessed codes.
127        accessed_codes.into_iter().for_each(|i| {
128            if !queued_codes[i] {
129                queued_codes[i] = true;
130                queue.push(i);
131            }
132        });
133    }
134    // iterate over accessed codes and check if all are accessed.
135    if queued_codes.into_iter().any(|x| !x) {
136        return Err(EofValidationError::CodeSectionNotAccessed);
137    }
138
139    Ok(())
140}
141
142/// EOF Error.
143#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
144pub enum EofError {
145    Decode(EofDecodeError),
146    Validation(EofValidationError),
147}
148
149impl From<EofDecodeError> for EofError {
150    fn from(err: EofDecodeError) -> Self {
151        EofError::Decode(err)
152    }
153}
154
155impl From<EofValidationError> for EofError {
156    fn from(err: EofValidationError) -> Self {
157        EofError::Validation(err)
158    }
159}
160
161#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
162pub enum EofValidationError {
163    FalsePossitive,
164    /// Opcode is not known. It is not defined in the opcode table.
165    UnknownOpcode,
166    /// Opcode is disabled in EOF. For example JUMP, JUMPI, etc.
167    OpcodeDisabled,
168    /// Every instruction inside bytecode should be forward accessed.
169    /// Forward access can be a jump or sequential opcode.
170    /// In case after terminal opcode there should be a forward jump.
171    InstructionNotForwardAccessed,
172    /// Bytecode is too small and is missing immediate bytes for instruction.
173    MissingImmediateBytes,
174    /// Similar to [`EofValidationError::MissingImmediateBytes`] but for special case of RJUMPV immediate bytes.
175    MissingRJUMPVImmediateBytes,
176    /// Invalid jump into immediate bytes.
177    JumpToImmediateBytes,
178    /// Invalid jump into immediate bytes.
179    BackwardJumpToImmediateBytes,
180    /// MaxIndex in RJUMPV can't be zero. Zero max index makes it RJUMPI.
181    RJUMPVZeroMaxIndex,
182    /// Jump with zero offset would make a jump to next opcode, it does not make sense.
183    JumpZeroOffset,
184    /// EOFCREATE points to container out of bounds.
185    EOFCREATEInvalidIndex,
186    /// CALLF section out of bounds.
187    CodeSectionOutOfBounds,
188    /// CALLF to non returning function is not allowed.
189    CALLFNonReturningFunction,
190    /// CALLF stack overflow.
191    StackOverflow,
192    /// JUMPF needs to have enough outputs.
193    JUMPFEnoughOutputs,
194    /// JUMPF Stack
195    JUMPFStackHigherThanOutputs,
196    /// DATA load out of bounds.
197    DataLoadOutOfBounds,
198    /// RETF biggest stack num more then outputs.
199    RETFBiggestStackNumMoreThenOutputs,
200    /// Stack requirement is more than smallest stack items.
201    StackUnderflow,
202    /// Smallest stack items is more than types output.
203    TypesStackUnderflow,
204    /// Jump out of bounds.
205    JumpUnderflow,
206    /// Jump to out of bounds.
207    JumpOverflow,
208    /// Backward jump should have same smallest and biggest stack items.
209    BackwardJumpBiggestNumMismatch,
210    /// Backward jump should have same smallest and biggest stack items.
211    BackwardJumpSmallestNumMismatch,
212    /// Last instruction should be terminating.
213    LastInstructionNotTerminating,
214    /// Code section not accessed.
215    CodeSectionNotAccessed,
216    /// Types section invalid
217    InvalidTypesSection,
218    /// First types section is invalid.
219    /// It should have inputs 0 and outputs 0x80.
220    InvalidFirstTypesSection,
221    /// Max stack element mismatch.
222    MaxStackMismatch,
223    /// No code sections present
224    NoCodeSections,
225}
226
227/// Validates that:
228/// * All instructions are valid.
229/// * It ends with a terminating instruction or RJUMP.
230/// * All instructions are accessed by forward jumps or .
231///
232/// Validate stack requirements and if all codes sections are used.
233///
234/// TODO mark accessed Types/Codes
235///
236/// Preconditions:
237/// * Jump destinations are valid.
238/// * All instructions are valid and well formed.
239/// * All instruction is accessed by forward jumps.
240/// * Bytecode is valid and ends with terminating instruction.
241///
242/// Preconditions are checked in `validate_eof_bytecode`.
243pub fn validate_eof_code(
244    code: &[u8],
245    data_size: usize,
246    this_types_index: usize,
247    num_of_containers: usize,
248    types: &[TypesSection],
249) -> Result<HashSet<usize>, EofValidationError> {
250    let mut accessed_codes = HashSet::<usize>::new();
251    let this_types = &types[this_types_index];
252
253    #[derive(Debug, Copy, Clone)]
254    struct InstructionInfo {
255        /// Is immediate byte, jumps can't happen on this part of code.
256        is_immediate: bool,
257        /// Have forward jump to this opcode. Used to check if opcode
258        /// after termination is accessed.
259        is_jumpdest: bool,
260        /// Smallest number of stack items accessed by jumps or sequential opcodes.
261        smallest: i32,
262        /// Biggest number of stack items accessed by jumps or sequential opcodes.
263        biggest: i32,
264    }
265
266    impl InstructionInfo {
267        #[inline]
268        fn mark_as_immediate(&mut self) -> Result<(), EofValidationError> {
269            if self.is_jumpdest {
270                // Jump to immediate bytes.
271                return Err(EofValidationError::JumpToImmediateBytes);
272            }
273            self.is_immediate = true;
274            Ok(())
275        }
276    }
277
278    impl Default for InstructionInfo {
279        fn default() -> Self {
280            Self {
281                is_immediate: false,
282                is_jumpdest: false,
283                smallest: i32::MAX,
284                biggest: i32::MIN,
285            }
286        }
287    }
288
289    // all bytes that are intermediate.
290    let mut jumps = vec![InstructionInfo::default(); code.len()];
291    let mut is_after_termination = false;
292
293    let mut next_smallest = this_types.inputs as i32;
294    let mut next_biggest = this_types.inputs as i32;
295
296    let mut i = 0;
297    // We can check validity and jump destinations in one pass.
298    while i < code.len() {
299        let op = code[i];
300        let opcode = &OPCODE_INFO_JUMPTABLE[op as usize];
301
302        let Some(opcode) = opcode else {
303            // err unknown opcode.
304            return Err(EofValidationError::UnknownOpcode);
305        };
306
307        if !opcode.is_eof {
308            // Opcode is disabled in EOF
309            return Err(EofValidationError::OpcodeDisabled);
310        }
311
312        let this_instruction = &mut jumps[i];
313
314        // update biggest/smallest values for next instruction only if it is not after termination.
315        if !is_after_termination {
316            this_instruction.smallest = core::cmp::min(this_instruction.smallest, next_smallest);
317            this_instruction.biggest = core::cmp::max(this_instruction.biggest, next_biggest);
318        }
319
320        let this_instruction = *this_instruction;
321
322        // Opcodes after termination should be accessed by forward jumps.
323        if is_after_termination && !this_instruction.is_jumpdest {
324            // opcode after termination was not accessed.
325            return Err(EofValidationError::InstructionNotForwardAccessed);
326        }
327        is_after_termination = opcode.is_terminating_opcode;
328
329        // mark immediate as non-jumpable. RJUMPV is special case covered later.
330        if opcode.immediate_size != 0 {
331            // check if the opcode immediate are within the bounds of the code
332            if i + opcode.immediate_size as usize >= code.len() {
333                // Malfunctional code
334                return Err(EofValidationError::MissingImmediateBytes);
335            }
336
337            // mark immediate bytes as non-jumpable.
338            for imm in 1..opcode.immediate_size as usize + 1 {
339                // SAFETY: immediate size is checked above.
340                jumps[i + imm].mark_as_immediate()?;
341            }
342        }
343        // IO diff used to generate next instruction smallest/biggest value.
344        let mut stack_io_diff = opcode.io_diff() as i32;
345        // how many stack items are required for this opcode.
346        let mut stack_requirement = opcode.inputs as i32;
347        // additional immediate bytes for RJUMPV, it has dynamic vtable.
348        let mut rjumpv_additional_immediates = 0;
349        // If opcodes is RJUMP, RJUMPI or RJUMPV then this will have absolute jumpdest.
350        let mut absolute_jumpdest = vec![];
351        match op {
352            opcode::RJUMP | opcode::RJUMPI => {
353                let offset = unsafe { read_i16(code.as_ptr().add(i + 1)) } as isize;
354                absolute_jumpdest = vec![offset + 3 + i as isize];
355                // RJUMP is considered a terminating opcode.
356            }
357            opcode::RJUMPV => {
358                // code length for RJUMPV is checked with immediate size.
359                let max_index = code[i + 1] as usize;
360                let len = max_index + 1;
361                // and max_index+1 is to get size of vtable as index starts from 0.
362                rjumpv_additional_immediates = len * 2;
363
364                // +1 is for max_index byte
365                if i + 1 + rjumpv_additional_immediates >= code.len() {
366                    // Malfunctional code RJUMPV vtable is not complete
367                    return Err(EofValidationError::MissingRJUMPVImmediateBytes);
368                }
369
370                // Mark vtable as immediate, max_index was already marked.
371                for imm in 0..rjumpv_additional_immediates {
372                    // SAFETY: immediate size is checked above.
373                    jumps[i + 2 + imm].mark_as_immediate()?;
374                }
375
376                let mut jumps = Vec::with_capacity(len);
377                for vtablei in 0..len {
378                    let offset =
379                        unsafe { read_i16(code.as_ptr().add(i + 2 + 2 * vtablei)) } as isize;
380                    jumps.push(offset + i as isize + 2 + rjumpv_additional_immediates as isize);
381                }
382                absolute_jumpdest = jumps
383            }
384            opcode::CALLF => {
385                let section_i = unsafe { read_u16(code.as_ptr().add(i + 1)) } as usize;
386                let Some(target_types) = types.get(section_i) else {
387                    // code section out of bounds.
388                    return Err(EofValidationError::CodeSectionOutOfBounds);
389                };
390
391                if target_types.outputs == EOF_NON_RETURNING_FUNCTION {
392                    // callf to non returning function is not allowed
393                    return Err(EofValidationError::CALLFNonReturningFunction);
394                }
395                // stack input for this opcode is the input of the called code.
396                stack_requirement = target_types.inputs as i32;
397                // stack diff depends on input/output of the called code.
398                stack_io_diff = target_types.io_diff();
399                // mark called code as accessed.
400                accessed_codes.insert(section_i);
401
402                // we decrement by `types.inputs` as they are considered as send
403                // to the called code and included in types.max_stack_size.
404                if this_instruction.biggest - stack_requirement + target_types.max_stack_size as i32
405                    > STACK_LIMIT as i32
406                {
407                    // if stack max items + called code max stack size
408                    return Err(EofValidationError::StackOverflow);
409                }
410            }
411            opcode::JUMPF => {
412                let target_index = unsafe { read_u16(code.as_ptr().add(i + 1)) } as usize;
413                // targeted code needs to have zero outputs (be non returning).
414                let Some(target_types) = types.get(target_index) else {
415                    // code section out of bounds.
416                    return Err(EofValidationError::CodeSectionOutOfBounds);
417                };
418
419                // we decrement types.inputs as they are considered send to the called code.
420                // and included in types.max_stack_size.
421                if this_instruction.biggest - target_types.inputs as i32
422                    + target_types.max_stack_size as i32
423                    > STACK_LIMIT as i32
424                {
425                    // stack overflow
426                    return Err(EofValidationError::StackOverflow);
427                }
428                accessed_codes.insert(target_index);
429
430                if target_types.outputs == EOF_NON_RETURNING_FUNCTION {
431                    // if it is not returning
432                    stack_requirement = target_types.inputs as i32;
433                } else {
434                    // check if target code produces enough outputs.
435                    if this_types.outputs < target_types.outputs {
436                        return Err(EofValidationError::JUMPFEnoughOutputs);
437                    }
438
439                    stack_requirement = this_types.outputs as i32 + target_types.inputs as i32
440                        - target_types.outputs as i32;
441
442                    // Stack requirement needs to more than this instruction biggest stack number.
443                    if this_instruction.biggest > stack_requirement {
444                        return Err(EofValidationError::JUMPFStackHigherThanOutputs);
445                    }
446
447                    // if this instruction max + target_types max is more then stack limit.
448                    if this_instruction.biggest + stack_requirement > STACK_LIMIT as i32 {
449                        return Err(EofValidationError::StackOverflow);
450                    }
451                }
452            }
453            opcode::EOFCREATE => {
454                let index = code[i + 1] as usize;
455                if index >= num_of_containers {
456                    // code section out of bounds.
457                    return Err(EofValidationError::EOFCREATEInvalidIndex);
458                }
459            }
460            opcode::DATALOADN => {
461                let index = unsafe { read_u16(code.as_ptr().add(i + 1)) } as isize;
462                if data_size < 32 || index > data_size as isize - 32 {
463                    // data load out of bounds.
464                    return Err(EofValidationError::DataLoadOutOfBounds);
465                }
466            }
467            opcode::RETF => {
468                stack_requirement = this_types.outputs as i32;
469                if this_instruction.biggest > stack_requirement {
470                    return Err(EofValidationError::RETFBiggestStackNumMoreThenOutputs);
471                }
472            }
473            opcode::DUPN => {
474                stack_requirement = code[i + 1] as i32 + 1;
475            }
476            opcode::SWAPN => {
477                stack_requirement = code[i + 1] as i32 + 2;
478            }
479            opcode::EXCHANGE => {
480                let imm = code[i + 1];
481                let n = (imm >> 4) + 1;
482                let m = (imm & 0x0F) + 1;
483                stack_requirement = n as i32 + m as i32 + 1;
484            }
485            _ => {}
486        }
487        // check if stack requirement is more than smallest stack items.
488        if stack_requirement > this_instruction.smallest {
489            // opcode requirement is more than smallest stack items.
490            return Err(EofValidationError::StackUnderflow);
491        }
492
493        next_smallest = this_instruction.smallest + stack_io_diff;
494        next_biggest = this_instruction.biggest + stack_io_diff;
495
496        // check if jumpdest are correct and mark forward jumps.
497        for absolute_jump in absolute_jumpdest {
498            if absolute_jump < 0 {
499                // jump out of bounds.
500                return Err(EofValidationError::JumpUnderflow);
501            }
502            if absolute_jump >= code.len() as isize {
503                // jump to out of bounds
504                return Err(EofValidationError::JumpOverflow);
505            }
506            // fine to cast as bounds are checked.
507            let absolute_jump = absolute_jump as usize;
508
509            let target_jump = &mut jumps[absolute_jump];
510            if target_jump.is_immediate {
511                // Jump target is immediate byte.
512                return Err(EofValidationError::BackwardJumpToImmediateBytes);
513            }
514
515            // needed to mark forward jumps. It does not do anything for backward jumps.
516            target_jump.is_jumpdest = true;
517
518            if absolute_jump <= i {
519                // backward jumps should have same smallest and biggest stack items.
520                if target_jump.biggest != next_biggest {
521                    // wrong jumpdest.
522                    return Err(EofValidationError::BackwardJumpBiggestNumMismatch);
523                }
524                if target_jump.smallest != next_smallest {
525                    // wrong jumpdest.
526                    return Err(EofValidationError::BackwardJumpSmallestNumMismatch);
527                }
528            } else {
529                // forward jumps can make min even smallest size
530                // while biggest num is needed to check stack overflow
531                target_jump.smallest = core::cmp::min(target_jump.smallest, next_smallest);
532                target_jump.biggest = core::cmp::max(target_jump.biggest, next_biggest);
533            }
534        }
535
536        // additional immediate are from RJUMPV vtable.
537        i += 1 + opcode.immediate_size as usize + rjumpv_additional_immediates;
538    }
539
540    // last opcode should be terminating
541    if !is_after_termination {
542        // wrong termination.
543        return Err(EofValidationError::LastInstructionNotTerminating);
544    }
545    // TODO integrate max so we dont need to iterate again
546    let mut max_stack_requirement = 0;
547    for opcode in jumps {
548        max_stack_requirement = core::cmp::max(opcode.biggest, max_stack_requirement);
549    }
550
551    if max_stack_requirement != types[this_types_index].max_stack_size as i32 {
552        // stack overflow
553        return Err(EofValidationError::MaxStackMismatch);
554    }
555
556    Ok(accessed_codes)
557}
558
559#[cfg(test)]
560mod test {
561    use super::*;
562    use rtvm_primitives::hex;
563
564    #[test]
565    fn test1() {
566        // result:Result { result: false, exception: Some("EOF_ConflictingStackHeight") }
567        let err =
568            validate_raw_eof(hex!("ef0001010004020001000704000000008000016000e200fffc00").into());
569        assert!(err.is_err(), "{err:#?}");
570    }
571
572    #[test]
573    fn test2() {
574        // result:Result { result: false, exception: Some("EOF_InvalidNumberOfOutputs") }
575        let err =
576            validate_raw_eof(hex!("ef000101000c02000300040004000204000000008000020002000100010001e30001005fe500025fe4").into());
577        assert!(err.is_ok(), "{err:#?}");
578    }
579
580    #[test]
581    fn test3() {
582        // result:Result { result: false, exception: Some("EOF_InvalidNumberOfOutputs") }
583        let err =
584            validate_raw_eof(hex!("ef000101000c02000300040008000304000000008000020002000503010003e30001005f5f5f5f5fe500025050e4").into());
585        assert_eq!(
586            err,
587            Err(EofError::Validation(
588                EofValidationError::JUMPFStackHigherThanOutputs
589            ))
590        );
591    }
592}