miden_processor/fast/mod.rs
1use alloc::vec::Vec;
2use core::cmp::min;
3
4use memory::Memory;
5use miden_air::RowIndex;
6use vm_core::{
7 Decorator, DecoratorIterator, EMPTY_WORD, Felt, Kernel, ONE, Operation, Program, StackOutputs,
8 WORD_SIZE, Word, ZERO,
9 mast::{
10 BasicBlockNode, CallNode, DynNode, ExternalNode, JoinNode, LoopNode, MastForest, MastNode,
11 MastNodeId, OP_GROUP_SIZE, OpBatch, SplitNode,
12 },
13 stack::MIN_STACK_DEPTH,
14 utils::range,
15};
16
17use crate::{
18 ContextId, ExecutionError, FMP_MIN, Host, ProcessState, SYSCALL_FMP_MIN, chiplets::Ace,
19 errors::ErrorContext, utils::resolve_external_node,
20};
21
22mod memory;
23
24// Ops
25mod circuit_eval;
26mod crypto_ops;
27mod field_ops;
28mod fri_ops;
29mod horner_ops;
30mod io_ops;
31mod stack_ops;
32mod sys_ops;
33mod u32_ops;
34
35#[cfg(test)]
36mod tests;
37
38/// The size of the stack buffer.
39///
40/// Note: This value is much larger than it needs to be for the majority of programs. However, some
41/// existing programs need it (e.g. `std::math::secp256k1::group::gen_mul`), so we're forced to push
42/// it up. At this high a value, we're starting to see some performance degradation on benchmarks.
43/// For example, the blake3 benchmark went from 285 MHz to 250 MHz (~10% degradation). Perhaps a
44/// better solution would be to make this value much smaller (~1000), and then fallback to a `Vec`
45/// if the stack overflows.
46const STACK_BUFFER_SIZE: usize = 6650;
47
48/// The initial position of the top of the stack in the stack buffer.
49///
50/// We place this value close to 0 because if a program hits the limit, it's much more likely to hit
51/// the upper bound than the lower bound, since hitting the lower bound only occurs when you drop
52/// 0's that were generated automatically to keep the stack depth at 16. In practice, if this
53/// occurs, it is most likely a bug.
54const INITIAL_STACK_TOP_IDX: usize = 50;
55
56/// WORD_SIZE, but as a `Felt`.
57const WORD_SIZE_FELT: Felt = Felt::new(4);
58
59/// The size of a double-word.
60const DOUBLE_WORD_SIZE: Felt = Felt::new(8);
61
62/// A fast processor which doesn't generate any trace.
63///
64/// This processor is designed to be as fast as possible. Hence, it only keeps track of the current
65/// state of the processor (i.e. the stack, current clock cycle, current memory context, and free
66/// memory pointer).
67///
68/// # Stack Management
69/// A few key points about how the stack was designed for maximum performance:
70///
71/// - The stack has a fixed buffer size defined by `STACK_BUFFER_SIZE`.
72/// - This was observed to increase performance by at least 2x compared to using a `Vec` with
73/// `push()` & `pop()`.
74/// - We track the stack top and bottom using indices `stack_top_idx` and `stack_bot_idx`,
75/// respectively.
76/// - Since we are using a fixed-size buffer, we need to ensure that stack buffer accesses are not
77/// out of bounds. Naively, we could check for this on every access. However, every operation
78/// alters the stack depth by a predetermined amount, allowing us to precisely determine the
79/// minimum number of operations required to reach a stack buffer boundary, whether at the top or
80/// bottom.
81/// - For example, if the stack top is 10 elements away from the top boundary, and the stack
82/// bottom is 15 elements away from the bottom boundary, then we can safely execute 10
83/// operations that modify the stack depth with no bounds check.
84/// - When switching contexts (e.g., during a call or syscall), all elements past the first 16 are
85/// stored in an `ExecutionContextInfo` struct, and the stack is truncated to 16 elements. This
86/// will be restored when returning from the call or syscall.
87///
88/// # Clock Cycle Management
89/// - The clock cycle (`clk`) is managed in the same way as in `Process`. That is, it is incremented
90/// by 1 for every row that `Process` adds to the main trace.
91/// - It is important to do so because the clock cycle is used to determine the context ID for
92/// new execution contexts when using `call` or `dyncall`.
93/// - When executing a basic block, the clock cycle is not incremented for every individual
94/// operation for performance reasons.
95/// - Rather, we use `clk + operation_index` to determine the clock cycle when needed.
96/// - However this performance improvement is slightly offset by the need to parse operation
97/// batches exactly the same as `Process`. We will be able to recover the performance loss by
98/// redesigning the `BasicBlockNode`.
99#[derive(Debug)]
100pub struct FastProcessor {
101 /// The stack is stored in reverse order, so that the last element is at the top of the stack.
102 pub(super) stack: [Felt; STACK_BUFFER_SIZE],
103 /// The index of the top of the stack.
104 stack_top_idx: usize,
105 /// The index of the bottom of the stack.
106 stack_bot_idx: usize,
107 /// The counter which keeps track of the number of instructions that we can execute without
108 /// hitting the bounds of `stack`.
109 bounds_check_counter: usize,
110
111 /// The current clock cycle.
112 ///
113 /// However, when we are in a basic block, this corresponds to the clock cycle at which the
114 /// basic block was entered. Hence, given an operation, we need to add its index in the
115 /// block to this value to get the clock cycle.
116 pub(super) clk: RowIndex,
117
118 /// The current context ID.
119 pub(super) ctx: ContextId,
120
121 /// The free memory pointer.
122 pub(super) fmp: Felt,
123
124 /// Whether we are currently in a syscall.
125 in_syscall: bool,
126
127 /// The hash of the function that called into the current context, or `[ZERO, ZERO, ZERO,
128 /// ZERO]` if we are in the first context (i.e. when `call_stack` is empty).
129 pub(super) caller_hash: Word,
130
131 /// A map from (context_id, word_address) to the word stored starting at that memory location.
132 pub(super) memory: Memory,
133
134 /// A map storing metadata per call to the ACE chiplet.
135 pub(super) ace: Ace,
136
137 /// The call stack is used when starting a new execution context (from a `call`, `syscall` or
138 /// `dyncall`) to keep track of the information needed to return to the previous context upon
139 /// return. It is a stack since calls can be nested.
140 call_stack: Vec<ExecutionContextInfo>,
141
142 /// Whether to enable debug statements and tracing.
143 in_debug_mode: bool,
144}
145
146impl FastProcessor {
147 // CONSTRUCTORS
148 // ----------------------------------------------------------------------------------------------
149
150 /// Creates a new `FastProcessor` instance with the given stack inputs.
151 ///
152 /// The stack inputs are expected to be stored in reverse order. For example, if `stack_inputs =
153 /// [1,2,3]`, then the stack will be initialized as `[3,2,1,0,0,...]`, with `3` being on
154 /// top.
155 ///
156 /// # Panics
157 /// - Panics if the length of `stack_inputs` is greater than `MIN_STACK_DEPTH`.
158 pub fn new(stack_inputs: &[Felt]) -> Self {
159 Self::initialize(stack_inputs, false)
160 }
161
162 /// Creates a new `FastProcessor` instance with the given stack inputs, set to debug mode.
163 ///
164 /// The stack inputs are expected to be stored in reverse order. For example, if `stack_inputs =
165 /// [1,2,3]`, then the stack will be initialized as `[3,2,1,0,0,...]`, with `3` being on
166 /// top.
167 ///
168 /// # Panics
169 /// - Panics if the length of `stack_inputs` is greater than `MIN_STACK_DEPTH`.
170 pub fn new_debug(stack_inputs: &[Felt]) -> Self {
171 Self::initialize(stack_inputs, true)
172 }
173
174 fn initialize(stack_inputs: &[Felt], in_debug_mode: bool) -> Self {
175 assert!(stack_inputs.len() <= MIN_STACK_DEPTH);
176
177 let stack_top_idx = INITIAL_STACK_TOP_IDX;
178 let stack = {
179 let mut stack = [ZERO; STACK_BUFFER_SIZE];
180 let bottom_idx = stack_top_idx - stack_inputs.len();
181
182 stack[bottom_idx..stack_top_idx].copy_from_slice(stack_inputs);
183 stack
184 };
185
186 let stack_bot_idx = stack_top_idx - MIN_STACK_DEPTH;
187
188 let bounds_check_counter = stack_bot_idx;
189
190 FastProcessor {
191 stack,
192 stack_top_idx,
193 stack_bot_idx,
194 bounds_check_counter,
195 clk: 0_u32.into(),
196 ctx: 0_u32.into(),
197 fmp: Felt::new(FMP_MIN),
198 in_syscall: false,
199 caller_hash: EMPTY_WORD,
200 memory: Memory::new(),
201 call_stack: Vec::new(),
202 ace: Ace::default(),
203 in_debug_mode,
204 }
205 }
206
207 // ACCESSORS
208 // -------------------------------------------------------------------------------------------
209
210 /// Returns the stack, such that the top of the stack is at the last index of the returned
211 /// slice.
212 pub fn stack(&self) -> &[Felt] {
213 &self.stack[self.stack_bot_idx..self.stack_top_idx]
214 }
215
216 /// Returns the element on the stack at index `idx`.
217 #[inline(always)]
218 pub fn stack_get(&self, idx: usize) -> Felt {
219 self.stack[self.stack_top_idx - idx - 1]
220 }
221
222 /// Mutable variant of `stack_get()`.
223 #[inline(always)]
224 pub fn stack_get_mut(&mut self, idx: usize) -> &mut Felt {
225 &mut self.stack[self.stack_top_idx - idx - 1]
226 }
227
228 /// Returns the word on the stack starting at index `start_idx` in "stack order".
229 ///
230 /// That is, for `start_idx=0` the top element of the stack will be at the last position in the
231 /// word.
232 ///
233 /// For example, if the stack looks like this:
234 ///
235 /// top bottom
236 /// v v
237 /// a | b | c | d | e | f | g | h | i | j | k | l | m | n | o | p
238 ///
239 /// Then
240 /// - `stack_get_word(0)` returns `[d, c, b, a]`,
241 /// - `stack_get_word(1)` returns `[e, d, c ,b]`,
242 /// - etc.
243 #[inline(always)]
244 pub fn stack_get_word(&self, start_idx: usize) -> Word {
245 debug_assert!(start_idx < MIN_STACK_DEPTH);
246
247 let word_start_idx = self.stack_top_idx - start_idx - 4;
248 self.stack[range(word_start_idx, WORD_SIZE)].try_into().unwrap()
249 }
250
251 /// Returns the number of elements on the stack in the current context.
252 #[inline(always)]
253 pub fn stack_depth(&self) -> u32 {
254 (self.stack_top_idx - self.stack_bot_idx) as u32
255 }
256
257 // MUTATORS
258 // -------------------------------------------------------------------------------------------
259
260 /// Writes an element to the stack at the given index.
261 #[inline(always)]
262 pub fn stack_write(&mut self, idx: usize, element: Felt) {
263 self.stack[self.stack_top_idx - idx - 1] = element
264 }
265
266 /// Writes a word to the stack starting at the given index.
267 ///
268 /// The index is the index of the first element of the word, and the word is written in reverse
269 /// order.
270 #[inline(always)]
271 pub fn stack_write_word(&mut self, start_idx: usize, word: &Word) {
272 debug_assert!(start_idx < MIN_STACK_DEPTH);
273
274 let word_start_idx = self.stack_top_idx - start_idx - 4;
275 self.stack[range(word_start_idx, WORD_SIZE)].copy_from_slice(word)
276 }
277
278 /// Swaps the elements at the given indices on the stack.
279 #[inline(always)]
280 pub fn stack_swap(&mut self, idx1: usize, idx2: usize) {
281 let a = self.stack_get(idx1);
282 let b = self.stack_get(idx2);
283 self.stack_write(idx1, b);
284 self.stack_write(idx2, a);
285 }
286
287 // EXECUTE
288 // -------------------------------------------------------------------------------------------
289
290 /// Executes the given program and returns the stack outputs.
291 pub fn execute(
292 mut self,
293 program: &Program,
294 host: &mut impl Host,
295 ) -> Result<StackOutputs, ExecutionError> {
296 self.execute_impl(program, host)
297 }
298
299 /// Executes the given program and returns the stack outputs.
300 ///
301 /// This function is mainly split out of `execute()` for testing purposes so that we can access
302 /// the internal state of the `FastProcessor` after execution.
303 fn execute_impl(
304 &mut self,
305 program: &Program,
306 host: &mut impl Host,
307 ) -> Result<StackOutputs, ExecutionError> {
308 self.execute_mast_node(
309 program.entrypoint(),
310 program.mast_forest(),
311 program.kernel(),
312 host,
313 )?;
314
315 StackOutputs::new(
316 self.stack[self.stack_bot_idx..self.stack_top_idx]
317 .iter()
318 .rev()
319 .copied()
320 .collect(),
321 )
322 .map_err(|_| {
323 ExecutionError::OutputStackOverflow(
324 self.stack_top_idx - self.stack_bot_idx - MIN_STACK_DEPTH,
325 )
326 })
327 }
328
329 // NODE EXECUTORS
330 // --------------------------------------------------------------------------------------------
331
332 fn execute_mast_node(
333 &mut self,
334 node_id: MastNodeId,
335 program: &MastForest,
336 kernel: &Kernel,
337 host: &mut impl Host,
338 ) -> Result<(), ExecutionError> {
339 let node = program
340 .get_node_by_id(node_id)
341 .ok_or(ExecutionError::MastNodeNotFoundInForest { node_id })?;
342
343 // Note: we only run this in case there are Trace events associated with the node. However,
344 // if there are assembly ops in the "before enter" list, we will cycle through them and
345 // ignore them, resulting in a drop of performance. We should remove this after trace events
346 // are removed from decorators - or if decorators are removed entirely.
347 //
348 // A similar reasoning applies to the "after exit" list.
349 for &decorator_id in node.before_enter() {
350 self.execute_decorator(&program[decorator_id], 0, host)?;
351 }
352
353 match node {
354 MastNode::Block(basic_block_node) => {
355 self.execute_basic_block_node(basic_block_node, program, host)?
356 },
357 MastNode::Join(join_node) => {
358 self.execute_join_node(join_node, program, kernel, host)?
359 },
360 MastNode::Split(split_node) => {
361 self.execute_split_node(split_node, program, kernel, host)?
362 },
363 MastNode::Loop(loop_node) => {
364 self.execute_loop_node(loop_node, program, kernel, host)?
365 },
366 MastNode::Call(call_node) => {
367 self.execute_call_node(call_node, program, kernel, host)?
368 },
369 MastNode::Dyn(dyn_node) => self.execute_dyn_node(dyn_node, program, kernel, host)?,
370 MastNode::External(external_node) => {
371 self.execute_external_node(external_node, kernel, host)?
372 },
373 }
374
375 for &decorator_id in node.after_exit() {
376 self.execute_decorator(&program[decorator_id], 0, host)?;
377 }
378
379 Ok(())
380 }
381
382 fn execute_join_node(
383 &mut self,
384 join_node: &JoinNode,
385 program: &MastForest,
386 kernel: &Kernel,
387 host: &mut impl Host,
388 ) -> Result<(), ExecutionError> {
389 // Corresponds to the row inserted for the JOIN operation added to the trace.
390 self.clk += 1_u32;
391
392 self.execute_mast_node(join_node.first(), program, kernel, host)?;
393 self.execute_mast_node(join_node.second(), program, kernel, host)?;
394
395 // Corresponds to the row inserted for the END operation added to the trace.
396 self.clk += 1_u32;
397
398 Ok(())
399 }
400
401 fn execute_split_node(
402 &mut self,
403 split_node: &SplitNode,
404 program: &MastForest,
405 kernel: &Kernel,
406 host: &mut impl Host,
407 ) -> Result<(), ExecutionError> {
408 // Corresponds to the row inserted for the SPLIT operation added to the trace.
409 self.clk += 1_u32;
410
411 let condition = self.stack_get(0);
412
413 // drop the condition from the stack
414 self.decrement_stack_size();
415
416 // execute the appropriate branch
417 let ret = if condition == ONE {
418 self.execute_mast_node(split_node.on_true(), program, kernel, host)
419 } else if condition == ZERO {
420 self.execute_mast_node(split_node.on_false(), program, kernel, host)
421 } else {
422 Err(ExecutionError::not_binary_value_if(condition, &ErrorContext::default()))
423 };
424
425 // Corresponds to the row inserted for the END operation added to the trace.
426 self.clk += 1_u32;
427
428 ret
429 }
430
431 fn execute_loop_node(
432 &mut self,
433 loop_node: &LoopNode,
434 program: &MastForest,
435 kernel: &Kernel,
436 host: &mut impl Host,
437 ) -> Result<(), ExecutionError> {
438 // Corresponds to the row inserted for the LOOP operation added to the trace.
439 self.clk += 1_u32;
440
441 // The loop condition is checked after the loop body is executed.
442 let mut condition = self.stack_get(0);
443
444 // drop the condition from the stack
445 self.decrement_stack_size();
446
447 // execute the loop body as long as the condition is true
448 while condition == ONE {
449 self.execute_mast_node(loop_node.body(), program, kernel, host)?;
450
451 // check the loop condition, and drop it from the stack
452 condition = self.stack_get(0);
453 self.decrement_stack_size();
454
455 // this clock increment is for the row inserted for the `REPEAT` node added to
456 // the trace on each iteration. It needs to be at the end of this loop (instead
457 // of at the beginning), otherwise we get an off-by-one error when comparing
458 // with [crate::Process].
459 if condition == ONE {
460 self.clk += 1_u32;
461 }
462 }
463
464 // Corresponds to the row inserted for the END operation added to the trace.
465 self.clk += 1_u32;
466
467 if condition == ZERO {
468 Ok(())
469 } else {
470 Err(ExecutionError::not_binary_value_loop(condition, &ErrorContext::default()))
471 }
472 }
473
474 fn execute_call_node(
475 &mut self,
476 call_node: &CallNode,
477 program: &MastForest,
478 kernel: &Kernel,
479 host: &mut impl Host,
480 ) -> Result<(), ExecutionError> {
481 // Corresponds to the row inserted for the CALL or SYSCALL operation added to the trace.
482 self.clk += 1_u32;
483
484 // call or syscall are not allowed inside a syscall
485 if self.in_syscall {
486 let instruction = if call_node.is_syscall() { "syscall" } else { "call" };
487 return Err(ExecutionError::CallInSyscall(instruction));
488 }
489
490 let callee_hash = program
491 .get_node_by_id(call_node.callee())
492 .ok_or(ExecutionError::MastNodeNotFoundInForest { node_id: call_node.callee() })?
493 .digest();
494
495 self.save_context_and_truncate_stack();
496
497 if call_node.is_syscall() {
498 // check if the callee is in the kernel
499 if !kernel.contains_proc(callee_hash) {
500 return Err(ExecutionError::syscall_target_not_in_kernel(
501 callee_hash,
502 &ErrorContext::default(),
503 ));
504 }
505
506 // set the system registers to the syscall context
507 self.ctx = ContextId::root();
508 self.fmp = SYSCALL_FMP_MIN.into();
509 self.in_syscall = true;
510 } else {
511 // set the system registers to the callee context
512 self.ctx = self.clk.into();
513 self.fmp = Felt::new(FMP_MIN);
514 self.caller_hash = callee_hash.into();
515 }
516
517 // Execute the callee.
518 self.execute_mast_node(call_node.callee(), program, kernel, host)?;
519
520 // when returning from a function call or a syscall, restore the context of the
521 // system registers and the operand stack to what it was prior to
522 // the call.
523 self.restore_context()?;
524
525 // Corresponds to the row inserted for the END operation added to the trace.
526 self.clk += 1_u32;
527
528 Ok(())
529 }
530
531 fn execute_dyn_node(
532 &mut self,
533 dyn_node: &DynNode,
534 program: &MastForest,
535 kernel: &Kernel,
536 host: &mut impl Host,
537 ) -> Result<(), ExecutionError> {
538 // Corresponds to the row inserted for the DYN or DYNCALL operation added to the trace.
539 self.clk += 1_u32;
540
541 // dyn calls are not allowed inside a syscall
542 if dyn_node.is_dyncall() && self.in_syscall {
543 return Err(ExecutionError::CallInSyscall("dyncall"));
544 }
545
546 // Retrieve callee hash from memory, using stack top as the memory address.
547 let callee_hash = {
548 let mem_addr = self.stack_get(0);
549 self.memory.read_word(self.ctx, mem_addr, self.clk).copied()?
550 };
551
552 // Drop the memory address from the stack. This needs to be done BEFORE saving the context,
553 // because the next instruction starts with a "shifted left" stack.
554 self.decrement_stack_size();
555
556 // For dyncall, save the context and reset it.
557 if dyn_node.is_dyncall() {
558 self.save_context_and_truncate_stack();
559 self.ctx = self.clk.into();
560 self.fmp = Felt::new(FMP_MIN);
561 self.caller_hash = callee_hash;
562 };
563
564 // if the callee is not in the program's MAST forest, try to find a MAST forest for it in
565 // the host (corresponding to an external library loaded in the host); if none are
566 // found, return an error.
567 match program.find_procedure_root(callee_hash.into()) {
568 Some(callee_id) => self.execute_mast_node(callee_id, program, kernel, host)?,
569 None => {
570 let mast_forest = host.get_mast_forest(&callee_hash.into()).ok_or_else(|| {
571 ExecutionError::dynamic_node_not_found(
572 callee_hash.into(),
573 &ErrorContext::default(),
574 )
575 })?;
576
577 // We limit the parts of the program that can be called externally to procedure
578 // roots, even though MAST doesn't have that restriction.
579 let root_id = mast_forest.find_procedure_root(callee_hash.into()).ok_or(
580 ExecutionError::malfored_mast_forest_in_host(
581 callee_hash.into(),
582 &ErrorContext::default(),
583 ),
584 )?;
585
586 self.execute_mast_node(root_id, &mast_forest, kernel, host)?
587 },
588 }
589
590 // For dyncall, restore the context.
591 if dyn_node.is_dyncall() {
592 self.restore_context()?;
593 }
594
595 // Corresponds to the row inserted for the END operation added to the trace.
596 self.clk += 1_u32;
597
598 Ok(())
599 }
600
601 fn execute_external_node(
602 &mut self,
603 external_node: &ExternalNode,
604 kernel: &Kernel,
605 host: &mut impl Host,
606 ) -> Result<(), ExecutionError> {
607 let (root_id, mast_forest) = resolve_external_node(external_node, host)?;
608
609 self.execute_mast_node(root_id, &mast_forest, kernel, host)
610 }
611
612 // Note: when executing individual ops, we do not increment the clock by 1 at every iteration
613 // for performance reasons (~25% performance drop). Hence, `self.clk` cannot be used directly to
614 // determine the number of operations executed in a program.
615 fn execute_basic_block_node(
616 &mut self,
617 basic_block_node: &BasicBlockNode,
618 program: &MastForest,
619 host: &mut impl Host,
620 ) -> Result<(), ExecutionError> {
621 // Corresponds to the row inserted for the SPAN operation added to the trace.
622 self.clk += 1_u32;
623
624 let mut batch_offset_in_block = 0;
625 let mut op_batches = basic_block_node.op_batches().iter();
626 let mut decorator_ids = basic_block_node.decorator_iter();
627
628 // execute first op batch
629 if let Some(first_op_batch) = op_batches.next() {
630 self.execute_op_batch(
631 first_op_batch,
632 &mut decorator_ids,
633 batch_offset_in_block,
634 program,
635 host,
636 )?;
637 batch_offset_in_block += first_op_batch.ops().len();
638 }
639
640 // execute the rest of the op batches
641 for op_batch in op_batches {
642 // increment clock to account for `RESPAN`
643 self.clk += 1_u32;
644
645 self.execute_op_batch(
646 op_batch,
647 &mut decorator_ids,
648 batch_offset_in_block,
649 program,
650 host,
651 )?;
652 batch_offset_in_block += op_batch.ops().len();
653 }
654
655 // update clock with all the operations that executed
656 self.clk += batch_offset_in_block as u32;
657
658 // Corresponds to the row inserted for the END operation added to the trace.
659 self.clk += 1_u32;
660
661 // execute any decorators which have not been executed during span ops execution; this can
662 // happen for decorators appearing after all operations in a block. these decorators are
663 // executed after SPAN block is closed to make sure the VM clock cycle advances beyond the
664 // last clock cycle of the SPAN block ops.
665 for &decorator_id in decorator_ids {
666 let decorator = program
667 .get_decorator_by_id(decorator_id)
668 .ok_or(ExecutionError::DecoratorNotFoundInForest { decorator_id })?;
669 self.execute_decorator(decorator, 0, host)?;
670 }
671
672 Ok(())
673 }
674
675 #[inline(always)]
676 fn execute_op_batch(
677 &mut self,
678 batch: &OpBatch,
679 decorators: &mut DecoratorIterator,
680 batch_offset_in_block: usize,
681 program: &MastForest,
682 host: &mut impl Host,
683 ) -> Result<(), ExecutionError> {
684 let op_counts = batch.op_counts();
685 let mut op_idx_in_group = 0;
686 let mut group_idx = 0;
687 let mut next_group_idx = 1;
688
689 // round up the number of groups to be processed to the next power of two; we do this
690 // because the processor requires the number of groups to be either 1, 2, 4, or 8; if
691 // the actual number of groups is smaller, we'll pad the batch with NOOPs at the end
692 let num_batch_groups = batch.num_groups().next_power_of_two();
693
694 // execute operations in the batch one by one
695 for (op_idx_in_batch, op) in batch.ops().iter().enumerate() {
696 while let Some(&decorator_id) =
697 decorators.next_filtered(batch_offset_in_block + op_idx_in_batch)
698 {
699 let decorator = program
700 .get_decorator_by_id(decorator_id)
701 .ok_or(ExecutionError::DecoratorNotFoundInForest { decorator_id })?;
702 self.execute_decorator(decorator, op_idx_in_batch, host)?;
703 }
704
705 // decode and execute the operation
706 self.execute_op(op, batch_offset_in_block + op_idx_in_batch, program, host)?;
707
708 // if the operation carries an immediate value, the value is stored at the next group
709 // pointer; so, we advance the pointer to the following group
710 let has_imm = op.imm_value().is_some();
711 if has_imm {
712 next_group_idx += 1;
713 }
714
715 // determine if we've executed all non-decorator operations in a group
716 if op_idx_in_group == op_counts[group_idx] - 1 {
717 // if we are at the end of the group, first check if the operation carries an
718 // immediate value
719 if has_imm {
720 // an operation with an immediate value cannot be the last operation in a group
721 // so, we need execute a NOOP after it. In this processor, we increment the
722 // clock to account for the NOOP.
723 debug_assert!(op_idx_in_group < OP_GROUP_SIZE - 1, "invalid op index");
724 self.clk += 1_u32;
725 }
726
727 // then, move to the next group and reset operation index
728 group_idx = next_group_idx;
729 next_group_idx += 1;
730 op_idx_in_group = 0;
731 } else {
732 op_idx_in_group += 1;
733 }
734 }
735
736 // make sure we execute the required number of operation groups; this would happen when the
737 // actual number of operation groups was not a power of two. In this processor, this
738 // corresponds to incrementing the clock by the number of empty op groups (i.e. 1 NOOP
739 // executed per missing op group).
740
741 self.clk += (num_batch_groups - group_idx) as u32;
742
743 Ok(())
744 }
745
746 /// Executes the specified decorator
747 fn execute_decorator(
748 &mut self,
749 decorator: &Decorator,
750 op_idx_in_batch: usize,
751 host: &mut impl Host,
752 ) -> Result<(), ExecutionError> {
753 match decorator {
754 Decorator::Debug(options) => {
755 if self.in_debug_mode {
756 host.on_debug(ProcessState::new_fast(self, op_idx_in_batch), options)?;
757 }
758 },
759 Decorator::AsmOp(_assembly_op) => {
760 // do nothing
761 },
762 Decorator::Trace(id) => {
763 host.on_trace(ProcessState::new_fast(self, op_idx_in_batch), *id)?;
764 },
765 };
766 Ok(())
767 }
768
769 fn execute_op(
770 &mut self,
771 operation: &Operation,
772 op_idx: usize,
773 program: &MastForest,
774 host: &mut impl Host,
775 ) -> Result<(), ExecutionError> {
776 if self.bounds_check_counter == 0 {
777 let err_str = if self.stack_top_idx - MIN_STACK_DEPTH == 0 {
778 "stack underflow"
779 } else {
780 "stack overflow"
781 };
782 return Err(ExecutionError::FailedToExecuteProgram(err_str));
783 }
784
785 match operation {
786 // ----- system operations ------------------------------------------------------------
787 Operation::Noop => {
788 // do nothing
789 },
790 Operation::Assert(err_code) => self.op_assert(*err_code, op_idx, host, program)?,
791 Operation::FmpAdd => self.op_fmpadd(),
792 Operation::FmpUpdate => self.op_fmpupdate()?,
793 Operation::SDepth => self.op_sdepth(),
794 Operation::Caller => self.op_caller()?,
795 Operation::Clk => self.op_clk(op_idx)?,
796 Operation::Emit(event_id) => self.op_emit(*event_id, op_idx, host)?,
797
798 // ----- flow control operations ------------------------------------------------------
799 // control flow operations are never executed directly
800 Operation::Join => unreachable!("control flow operation"),
801 Operation::Split => unreachable!("control flow operation"),
802 Operation::Loop => unreachable!("control flow operation"),
803 Operation::Call => unreachable!("control flow operation"),
804 Operation::SysCall => unreachable!("control flow operation"),
805 Operation::Dyn => unreachable!("control flow operation"),
806 Operation::Dyncall => unreachable!("control flow operation"),
807 Operation::Span => unreachable!("control flow operation"),
808 Operation::Repeat => unreachable!("control flow operation"),
809 Operation::Respan => unreachable!("control flow operation"),
810 Operation::End => unreachable!("control flow operation"),
811 Operation::Halt => unreachable!("control flow operation"),
812
813 // ----- field operations -------------------------------------------------------------
814 Operation::Add => self.op_add()?,
815 Operation::Neg => self.op_neg()?,
816 Operation::Mul => self.op_mul()?,
817 Operation::Inv => self.op_inv(op_idx)?,
818 Operation::Incr => self.op_incr()?,
819 Operation::And => self.op_and()?,
820 Operation::Or => self.op_or()?,
821 Operation::Not => self.op_not()?,
822 Operation::Eq => self.op_eq()?,
823 Operation::Eqz => self.op_eqz()?,
824 Operation::Expacc => self.op_expacc(),
825 Operation::Ext2Mul => self.op_ext2mul(),
826
827 // ----- u32 operations ---------------------------------------------------------------
828 Operation::U32split => self.op_u32split()?,
829 Operation::U32add => self.op_u32add()?,
830 Operation::U32add3 => self.op_u32add3()?,
831 Operation::U32sub => self.op_u32sub(op_idx)?,
832 Operation::U32mul => self.op_u32mul()?,
833 Operation::U32madd => self.op_u32madd()?,
834 Operation::U32div => self.op_u32div(op_idx)?,
835 Operation::U32and => self.op_u32and()?,
836 Operation::U32xor => self.op_u32xor()?,
837 Operation::U32assert2(err_code) => self.op_u32assert2(*err_code)?,
838
839 // ----- stack manipulation -----------------------------------------------------------
840 Operation::Pad => self.op_pad(),
841 Operation::Drop => self.decrement_stack_size(),
842 Operation::Dup0 => self.dup_nth(0),
843 Operation::Dup1 => self.dup_nth(1),
844 Operation::Dup2 => self.dup_nth(2),
845 Operation::Dup3 => self.dup_nth(3),
846 Operation::Dup4 => self.dup_nth(4),
847 Operation::Dup5 => self.dup_nth(5),
848 Operation::Dup6 => self.dup_nth(6),
849 Operation::Dup7 => self.dup_nth(7),
850 Operation::Dup9 => self.dup_nth(9),
851 Operation::Dup11 => self.dup_nth(11),
852 Operation::Dup13 => self.dup_nth(13),
853 Operation::Dup15 => self.dup_nth(15),
854 Operation::Swap => self.op_swap(),
855 Operation::SwapW => self.swapw_nth(1),
856 Operation::SwapW2 => self.swapw_nth(2),
857 Operation::SwapW3 => self.swapw_nth(3),
858 Operation::SwapDW => self.op_swap_double_word(),
859 Operation::MovUp2 => self.rotate_left(3),
860 Operation::MovUp3 => self.rotate_left(4),
861 Operation::MovUp4 => self.rotate_left(5),
862 Operation::MovUp5 => self.rotate_left(6),
863 Operation::MovUp6 => self.rotate_left(7),
864 Operation::MovUp7 => self.rotate_left(8),
865 Operation::MovUp8 => self.rotate_left(9),
866 Operation::MovDn2 => self.rotate_right(3),
867 Operation::MovDn3 => self.rotate_right(4),
868 Operation::MovDn4 => self.rotate_right(5),
869 Operation::MovDn5 => self.rotate_right(6),
870 Operation::MovDn6 => self.rotate_right(7),
871 Operation::MovDn7 => self.rotate_right(8),
872 Operation::MovDn8 => self.rotate_right(9),
873 Operation::CSwap => self.op_cswap()?,
874 Operation::CSwapW => self.op_cswapw()?,
875
876 // ----- input / output ---------------------------------------------------------------
877 Operation::Push(element) => self.op_push(*element),
878 Operation::AdvPop => self.op_advpop(op_idx, host)?,
879 Operation::AdvPopW => self.op_advpopw(op_idx, host)?,
880 Operation::MLoadW => self.op_mloadw(op_idx)?,
881 Operation::MStoreW => self.op_mstorew(op_idx)?,
882 Operation::MLoad => self.op_mload()?,
883 Operation::MStore => self.op_mstore()?,
884 Operation::MStream => self.op_mstream(op_idx)?,
885 Operation::Pipe => self.op_pipe(op_idx, host)?,
886
887 // ----- cryptographic operations -----------------------------------------------------
888 Operation::HPerm => self.op_hperm(),
889 Operation::MpVerify(err_code) => self.op_mpverify(*err_code, host, program)?,
890 Operation::MrUpdate => self.op_mrupdate(host)?,
891 Operation::FriE2F4 => self.op_fri_ext2fold4()?,
892 Operation::HornerBase => self.op_horner_eval_base(op_idx)?,
893 Operation::HornerExt => self.op_horner_eval_ext(op_idx)?,
894 Operation::ArithmeticCircuitEval => self.arithmetic_circuit_eval(op_idx)?,
895 }
896
897 Ok(())
898 }
899
900 // HELPERS
901 // ----------------------------------------------------------------------------------------------
902
903 /// Increments the stack top pointer by 1.
904 ///
905 /// The bottom of the stack is never affected by this operation.
906 #[inline(always)]
907 fn increment_stack_size(&mut self) {
908 self.stack_top_idx += 1;
909 self.update_bounds_check_counter();
910 }
911
912 /// Decrements the stack top pointer by 1.
913 ///
914 /// The bottom of the stack is only decremented in cases where the stack depth would become less
915 /// than 16.
916 #[inline(always)]
917 fn decrement_stack_size(&mut self) {
918 self.stack_top_idx -= 1;
919 self.stack_bot_idx = min(self.stack_bot_idx, self.stack_top_idx - MIN_STACK_DEPTH);
920 self.update_bounds_check_counter();
921 }
922
923 /// Returns the size of the stack.
924 #[inline(always)]
925 fn stack_size(&self) -> usize {
926 self.stack_top_idx - self.stack_bot_idx
927 }
928
929 /// Updates the bounds check counter.
930 ///
931 /// The bounds check counter is decremented by 1. If it reaches 0, it is reset to the minimum of
932 /// the stack depth from the low end and the high end of the stack buffer.
933 ///
934 /// The purpose of the bounds check counter is to ensure that we never access the stack buffer
935 /// at an out-of-bounds index.
936 #[inline(always)]
937 fn update_bounds_check_counter(&mut self) {
938 self.bounds_check_counter -= 1;
939
940 if self.bounds_check_counter == 0 {
941 // We will need to check the bounds either because we reach the low end or the high end
942 // of the stack buffer. There are two worst cases that we are concerned about:
943 // - we only execute instructions that decrease stack depth
944 // - we only execute instructions that increase stack depth
945 //
946 // In the first case, we will hit the low end of the stack buffer; in the second case,
947 // we will hit the high end of the stack buffer. We set the number of instructions that
948 // is safe to execute to be the minimum of these two worst cases.
949
950 self.bounds_check_counter =
951 min(self.stack_top_idx - MIN_STACK_DEPTH, STACK_BUFFER_SIZE - self.stack_top_idx);
952 }
953 }
954
955 /// Saves the current execution context and truncates the stack to 16 elements in preparation to
956 /// start a new execution context.
957 fn save_context_and_truncate_stack(&mut self) {
958 let overflow_stack = if self.stack_size() > MIN_STACK_DEPTH {
959 // save the overflow stack, and zero out the buffer.
960 //
961 // Note: we need to zero the overflow buffer, since the new context expects ZERO's to be
962 // pulled in if they decrement the stack size (e.g. by executing a `drop`).
963 let overflow_stack =
964 self.stack[self.stack_bot_idx..self.stack_top_idx - MIN_STACK_DEPTH].to_vec();
965 self.stack[self.stack_bot_idx..self.stack_top_idx - MIN_STACK_DEPTH].fill(ZERO);
966
967 overflow_stack
968 } else {
969 Vec::new()
970 };
971
972 self.stack_bot_idx = self.stack_top_idx - MIN_STACK_DEPTH;
973
974 self.call_stack.push(ExecutionContextInfo {
975 overflow_stack,
976 ctx: self.ctx,
977 fn_hash: self.caller_hash,
978 fmp: self.fmp,
979 });
980 }
981
982 /// Restores the execution context to the state it was in before the last `call`, `syscall` or
983 /// `dyncall`.
984 ///
985 /// This includes restoring the overflow stack and the system parameters.
986 ///
987 /// # Errors
988 /// - Returns an error if the overflow stack is larger than the space available in the stack
989 /// buffer.
990 fn restore_context(&mut self) -> Result<(), ExecutionError> {
991 // when a call/dyncall/syscall node ends, stack depth must be exactly 16.
992 if self.stack_size() > MIN_STACK_DEPTH {
993 return Err(ExecutionError::invalid_stack_depth_on_return(
994 self.stack_size(),
995 &ErrorContext::default(),
996 ));
997 }
998
999 let ctx_info = self
1000 .call_stack
1001 .pop()
1002 .expect("execution context stack should never be empty when restoring context");
1003
1004 // restore the overflow stack
1005 {
1006 let overflow_len = ctx_info.overflow_stack.len();
1007 if overflow_len > self.stack_bot_idx {
1008 return Err(ExecutionError::FailedToExecuteProgram(
1009 "stack underflow when restoring context",
1010 ));
1011 }
1012
1013 self.stack[range(self.stack_bot_idx - overflow_len, overflow_len)]
1014 .copy_from_slice(&ctx_info.overflow_stack);
1015 self.stack_bot_idx -= overflow_len;
1016 }
1017
1018 // restore system parameters
1019 self.ctx = ctx_info.ctx;
1020 self.fmp = ctx_info.fmp;
1021 self.in_syscall = false;
1022 self.caller_hash = ctx_info.fn_hash;
1023
1024 Ok(())
1025 }
1026}
1027
1028// EXECUTION CONTEXT INFO
1029// ===============================================================================================
1030
1031/// Information about the execution context.
1032///
1033/// This struct is used to keep track of the information needed to return to the previous context
1034/// upon return from a `call`, `syscall` or `dyncall`.
1035#[derive(Debug)]
1036struct ExecutionContextInfo {
1037 /// This stores all the elements on the stack at the call site, excluding the top 16 elements.
1038 /// This corresponds to the overflow table in [crate::Process].
1039 overflow_stack: Vec<Felt>,
1040 ctx: ContextId,
1041 fn_hash: Word,
1042 fmp: Felt,
1043}