Skip to main content

miden_processor/trace/
execution_tracer.rs

1use alloc::{sync::Arc, vec::Vec};
2
3use miden_air::trace::chiplets::hasher::{
4    CONTROLLER_ROWS_PER_PERM_FELT, CONTROLLER_ROWS_PER_PERMUTATION, STATE_WIDTH,
5};
6use miden_core::{FMP_ADDR, FMP_INIT_VALUE, operations::Operation};
7
8use super::{
9    block_stack::{BlockInfo, BlockStack, ExecutionContextInfo},
10    stack::OverflowTable,
11    trace_state::{
12        AceReplay, AdviceReplay, BitwiseReplay, BlockAddressReplay, BlockStackReplay,
13        CoreTraceFragmentContext, CoreTraceState, DecoderState, ExecutionContextReplay,
14        ExecutionContextSystemInfo, ExecutionReplay, HasherRequestReplay, HasherResponseReplay,
15        KernelReplay, MastForestResolutionReplay, MemoryReadsReplay, MemoryWritesReplay,
16        RangeCheckerReplay, StackOverflowReplay, StackState, SystemState,
17    },
18    utils::split_u32_into_u16,
19};
20use crate::{
21    ContextId, EMPTY_WORD, FastProcessor, Felt, MIN_STACK_DEPTH, ONE, RowIndex, Word, ZERO,
22    continuation_stack::{Continuation, ContinuationStack},
23    crypto::merkle::MerklePath,
24    mast::{
25        BasicBlockNode, JoinNode, LoopNode, MastForest, MastNode, MastNodeExt, MastNodeId,
26        SplitNode,
27    },
28    processor::{Processor, StackInterface, SystemInterface},
29    trace::chiplets::{CircuitEvaluation, PTR_OFFSET_ELEM, PTR_OFFSET_WORD},
30    tracer::{OperationHelperRegisters, Tracer},
31};
32
33// STATE SNAPSHOT
34// ================================================================================================
35
36/// Execution state snapshot, used to record the state at the start of a trace fragment.
37#[derive(Debug)]
38struct StateSnapshot {
39    state: CoreTraceState,
40    continuation_stack: ContinuationStack,
41    initial_mast_forest: Arc<MastForest>,
42}
43
44// TRACE GENERATION CONTEXT
45// ================================================================================================
46
47#[derive(Debug)]
48pub struct TraceGenerationContext {
49    /// The list of trace fragment contexts built during execution.
50    pub core_trace_contexts: Vec<CoreTraceFragmentContext>,
51
52    // Replays that contain additional data needed to generate the range checker and chiplets
53    // columns.
54    pub range_checker_replay: RangeCheckerReplay,
55    pub memory_writes: MemoryWritesReplay,
56    pub bitwise_replay: BitwiseReplay,
57    pub hasher_for_chiplet: HasherRequestReplay,
58    pub kernel_replay: KernelReplay,
59    pub ace_replay: AceReplay,
60
61    /// The number of rows per core trace fragment, except for the last fragment which may be
62    /// shorter.
63    pub fragment_size: usize,
64
65    /// The maximum number of field elements allowed on the operand stack in an active execution
66    /// context.
67    pub max_stack_depth: usize,
68}
69
70/// Builder for recording the context to generate trace fragments during execution.
71///
72/// Specifically, this records the information necessary to be able to generate the trace in
73/// fragments of configurable length. This requires storing state at the very beginning of the
74/// fragment before any operations are executed, as well as recording the various values read during
75/// execution in the corresponding "replays" (e.g. values read from memory are recorded in
76/// `MemoryReadsReplay`, values read from the advice provider are recorded in `AdviceReplay``, etc).
77///
78/// Then, to generate a trace fragment, we initialize the state of the processor using the stored
79/// snapshot from the beginning of the fragment, and replay the recorded values as they are
80/// encountered during execution (e.g. when encountering a memory read operation, we will replay the
81/// value rather than querying the memory chiplet).
82#[derive(Debug)]
83pub struct ExecutionTracer {
84    // State stored at the start of a core trace fragment.
85    //
86    // This field is only set to `None` at initialization, and is populated when starting a new
87    // trace fragment with `Self::start_new_fragment_context()`. Hence, on the first call to
88    // `Self::start_new_fragment_context()`, we don't extract a new `TraceFragmentContext`, but in
89    // every other call, we do.
90    state_snapshot: Option<StateSnapshot>,
91
92    // Replay data aggregated throughout the execution of a core trace fragment
93    overflow_table: OverflowTable,
94    overflow_replay: StackOverflowReplay,
95
96    block_stack: BlockStack,
97    block_stack_replay: BlockStackReplay,
98    execution_context_replay: ExecutionContextReplay,
99
100    hasher_chiplet_shim: HasherChipletShim,
101    memory_reads: MemoryReadsReplay,
102    advice: AdviceReplay,
103    external: MastForestResolutionReplay,
104
105    // Replays that contain additional data needed to generate the range checker and chiplets
106    // columns.
107    range_checker: RangeCheckerReplay,
108    memory_writes: MemoryWritesReplay,
109    bitwise: BitwiseReplay,
110    kernel: KernelReplay,
111    hasher_for_chiplet: HasherRequestReplay,
112    ace: AceReplay,
113
114    // Output
115    fragment_contexts: Vec<CoreTraceFragmentContext>,
116
117    /// The number of rows per core trace fragment.
118    fragment_size: usize,
119
120    /// The maximum number of field elements allowed on the operand stack in an active execution
121    /// context.
122    max_stack_depth: usize,
123
124    /// Flag set in `start_clock_cycle` when a Call/Syscall/Dyncall END is encountered, consumed
125    /// in `finalize_clock_cycle` to call `overflow_table.restore_context()`. This is deferred to
126    /// `finalize_clock_cycle` because `finalize_clock_cycle` is only called when the operation
127    /// succeeds (i.e., the stack depth check passes).
128    pending_restore_context: bool,
129
130    /// Flag set in `start_clock_cycle` when an `EvalCircuit` operation is encountered, consumed
131    /// in `finalize_clock_cycle` to record the memory reads performed by the operation.
132    is_eval_circuit_op: bool,
133}
134
135impl ExecutionTracer {
136    /// Creates a new `ExecutionTracer` with the given fragment size.
137    #[inline(always)]
138    pub fn new(fragment_size: usize, max_stack_depth: usize) -> Self {
139        Self {
140            state_snapshot: None,
141            overflow_table: OverflowTable::default(),
142            overflow_replay: StackOverflowReplay::default(),
143            block_stack: BlockStack::default(),
144            block_stack_replay: BlockStackReplay::default(),
145            execution_context_replay: ExecutionContextReplay::default(),
146            hasher_chiplet_shim: HasherChipletShim::default(),
147            memory_reads: MemoryReadsReplay::default(),
148            range_checker: RangeCheckerReplay::default(),
149            memory_writes: MemoryWritesReplay::default(),
150            advice: AdviceReplay::default(),
151            bitwise: BitwiseReplay::default(),
152            kernel: KernelReplay::default(),
153            hasher_for_chiplet: HasherRequestReplay::default(),
154            ace: AceReplay::default(),
155            external: MastForestResolutionReplay::default(),
156            fragment_contexts: Vec::new(),
157            fragment_size,
158            max_stack_depth,
159            pending_restore_context: false,
160            is_eval_circuit_op: false,
161        }
162    }
163
164    /// Convert the `ExecutionTracer` into a [TraceGenerationContext] using the data accumulated
165    /// during execution.
166    #[inline(always)]
167    pub fn into_trace_generation_context(mut self) -> TraceGenerationContext {
168        // If there is an ongoing trace state being built, finish it
169        self.finish_current_fragment_context();
170
171        TraceGenerationContext {
172            core_trace_contexts: self.fragment_contexts,
173            range_checker_replay: self.range_checker,
174            memory_writes: self.memory_writes,
175            bitwise_replay: self.bitwise,
176            kernel_replay: self.kernel,
177            hasher_for_chiplet: self.hasher_for_chiplet,
178            ace_replay: self.ace,
179            fragment_size: self.fragment_size,
180            max_stack_depth: self.max_stack_depth,
181        }
182    }
183
184    // HELPERS
185    // -------------------------------------------------------------------------------------------
186
187    /// Captures the internal state into a new [TraceFragmentContext] (stored internally), resets
188    /// the internal replay state of the builder, and records a new state snapshot, marking the
189    /// beginning of the next trace state.
190    ///
191    /// This must be called at the beginning of a new trace fragment, before executing the first
192    /// operation. Internal replay fields are expected to be accessed during execution of this new
193    /// fragment to record data to be replayed by the trace fragment generators.
194    #[inline(always)]
195    fn start_new_fragment_context(
196        &mut self,
197        system_state: SystemState,
198        stack_top: [Felt; MIN_STACK_DEPTH],
199        mut continuation_stack: ContinuationStack,
200        continuation: Continuation,
201        current_forest: Arc<MastForest>,
202    ) {
203        // If there is an ongoing snapshot, finish it
204        self.finish_current_fragment_context();
205
206        // Start a new snapshot
207        self.state_snapshot = {
208            let decoder_state = {
209                if self.block_stack.is_empty() {
210                    DecoderState { current_addr: ZERO, parent_addr: ZERO }
211                } else {
212                    let block_info = self.block_stack.peek();
213
214                    DecoderState {
215                        current_addr: block_info.addr,
216                        parent_addr: block_info.parent_addr,
217                    }
218                }
219            };
220            let stack = {
221                let stack_depth =
222                    MIN_STACK_DEPTH + self.overflow_table.num_elements_in_current_ctx();
223                let last_overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
224                StackState::new(stack_top, stack_depth, last_overflow_addr)
225            };
226
227            // Push new continuation corresponding to the current execution state
228            continuation_stack.push_continuation(continuation);
229
230            Some(StateSnapshot {
231                state: CoreTraceState {
232                    system: system_state,
233                    decoder: decoder_state,
234                    stack,
235                },
236                continuation_stack,
237                initial_mast_forest: current_forest,
238            })
239        };
240    }
241
242    #[inline(always)]
243    fn record_control_node_start<P: Processor>(
244        &mut self,
245        node: &MastNode,
246        processor: &P,
247        current_forest: &MastForest,
248    ) {
249        let ctx_info = match node {
250            MastNode::Join(node) => {
251                let child1_hash = current_forest
252                    .get_node_by_id(node.first())
253                    .expect("join node's first child expected to be in the forest")
254                    .digest();
255                let child2_hash = current_forest
256                    .get_node_by_id(node.second())
257                    .expect("join node's second child expected to be in the forest")
258                    .digest();
259                self.hasher_for_chiplet.record_hash_control_block(
260                    child1_hash,
261                    child2_hash,
262                    JoinNode::DOMAIN,
263                    node.digest(),
264                );
265
266                None
267            },
268            MastNode::Split(node) => {
269                let child1_hash = current_forest
270                    .get_node_by_id(node.on_true())
271                    .expect("split node's true child expected to be in the forest")
272                    .digest();
273                let child2_hash = current_forest
274                    .get_node_by_id(node.on_false())
275                    .expect("split node's false child expected to be in the forest")
276                    .digest();
277                self.hasher_for_chiplet.record_hash_control_block(
278                    child1_hash,
279                    child2_hash,
280                    SplitNode::DOMAIN,
281                    node.digest(),
282                );
283
284                None
285            },
286            MastNode::Loop(node) => {
287                let body_hash = current_forest
288                    .get_node_by_id(node.body())
289                    .expect("loop node's body expected to be in the forest")
290                    .digest();
291
292                self.hasher_for_chiplet.record_hash_control_block(
293                    body_hash,
294                    EMPTY_WORD,
295                    LoopNode::DOMAIN,
296                    node.digest(),
297                );
298
299                None
300            },
301            MastNode::Call(node) => {
302                let callee_hash = current_forest
303                    .get_node_by_id(node.callee())
304                    .expect("call node's callee expected to be in the forest")
305                    .digest();
306
307                self.hasher_for_chiplet.record_hash_control_block(
308                    callee_hash,
309                    EMPTY_WORD,
310                    node.domain(),
311                    node.digest(),
312                );
313
314                let overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
315                Some(ExecutionContextInfo::new(
316                    processor.system().ctx(),
317                    processor.system().caller_hash(),
318                    processor.stack().depth(),
319                    overflow_addr,
320                ))
321            },
322            MastNode::Dyn(dyn_node) => {
323                self.hasher_for_chiplet.record_hash_control_block(
324                    EMPTY_WORD,
325                    EMPTY_WORD,
326                    dyn_node.domain(),
327                    dyn_node.digest(),
328                );
329
330                if dyn_node.is_dyncall() {
331                    // DYNCALL drops the top stack element (the memory address holding the
332                    // callee hash) and records the stack state *after* the drop as the new
333                    // context.
334                    //
335                    // `record_control_node_start()` is called *before* `decrement_stack_size()`,
336                    // so we must compute the post-drop overflow address without actually
337                    // performing the pop.  We use `clk_after_pop_in_current_ctx()` which
338                    // returns the clock of the second-to-last overflow entry (i.e. what
339                    // `last_update_clk_in_current_ctx()` would return after the pop), or ZERO
340                    // when the overflow stack has ≤1 entry and would become empty.
341                    //
342                    // When the stack is already at MIN_STACK_DEPTH the drop does not reduce
343                    // the depth and the overflow address is ZERO — mirroring the same guard
344                    // already present in the parallel-tracer path.  See #2813 / PR #2904.
345                    let (stack_depth_after_drop, overflow_addr) =
346                        if processor.stack().depth() > MIN_STACK_DEPTH as u32 {
347                            (
348                                processor.stack().depth() - 1,
349                                self.overflow_table.clk_after_pop_in_current_ctx(),
350                            )
351                        } else {
352                            (processor.stack().depth(), ZERO)
353                        };
354                    Some(ExecutionContextInfo::new(
355                        processor.system().ctx(),
356                        processor.system().caller_hash(),
357                        stack_depth_after_drop,
358                        overflow_addr,
359                    ))
360                } else {
361                    None
362                }
363            },
364            MastNode::Block(_) => panic!(
365                "`ExecutionTracer::record_basic_block_start()` must be called instead for basic blocks"
366            ),
367            MastNode::External(_) => panic!(
368                "External nodes are guaranteed to be resolved before record_control_node_start is called"
369            ),
370        };
371
372        let block_addr = self.hasher_chiplet_shim.record_hash_control_block();
373        let parent_addr = self.block_stack.push(block_addr, ctx_info);
374        self.block_stack_replay.record_node_start_parent_addr(parent_addr);
375    }
376
377    /// Records the block address for an END operation based on the block being popped.
378    #[inline(always)]
379    fn record_node_end(&mut self, block_info: &BlockInfo) {
380        let (prev_addr, prev_parent_addr) = if self.block_stack.is_empty() {
381            (ZERO, ZERO)
382        } else {
383            let prev_block = self.block_stack.peek();
384            (prev_block.addr, prev_block.parent_addr)
385        };
386        self.block_stack_replay
387            .record_node_end(block_info.addr, prev_addr, prev_parent_addr);
388    }
389
390    /// Records the execution context system info for CALL/SYSCALL/DYNCALL operations.
391    #[inline(always)]
392    fn record_execution_context(&mut self, ctx_info: ExecutionContextSystemInfo) {
393        self.execution_context_replay.record_execution_context(ctx_info);
394    }
395
396    /// Records the current core trace state, if any.
397    ///
398    /// Specifically, extracts the stored [SnapshotStart] as well as all the replay data recorded
399    /// from the various components (e.g. memory, advice, etc) since the last call to this method.
400    /// Resets the internal state to default values to prepare for the next trace fragment.
401    ///
402    /// Note that the very first time that this is called (at clock cycle 0), the snapshot will not
403    /// contain any replay data, and so no core trace state will be recorded.
404    #[inline(always)]
405    fn finish_current_fragment_context(&mut self) {
406        if let Some(snapshot) = self.state_snapshot.take() {
407            // Extract the replays
408            let (hasher_replay, block_addr_replay) = self.hasher_chiplet_shim.extract_replay();
409            let memory_reads_replay = core::mem::take(&mut self.memory_reads);
410            let advice_replay = core::mem::take(&mut self.advice);
411            let external_replay = core::mem::take(&mut self.external);
412            let stack_overflow_replay = core::mem::take(&mut self.overflow_replay);
413            let block_stack_replay = core::mem::take(&mut self.block_stack_replay);
414            let execution_context_replay = core::mem::take(&mut self.execution_context_replay);
415
416            let trace_state = CoreTraceFragmentContext {
417                state: snapshot.state,
418                replay: ExecutionReplay {
419                    hasher: hasher_replay,
420                    block_address: block_addr_replay,
421                    memory_reads: memory_reads_replay,
422                    advice: advice_replay,
423                    mast_forest_resolution: external_replay,
424                    stack_overflow: stack_overflow_replay,
425                    block_stack: block_stack_replay,
426                    execution_context: execution_context_replay,
427                },
428                continuation: snapshot.continuation_stack,
429                initial_mast_forest: snapshot.initial_mast_forest,
430            };
431
432            self.fragment_contexts.push(trace_state);
433        }
434    }
435
436    /// Pushes the value at stack position 15 onto the overflow table. This must be called in
437    /// `Tracer::start_clock_cycle()` *before* the processor increments the stack size, where stack
438    /// position 15 at the start of the clock cycle corresponds to the element that overflows.
439    #[inline(always)]
440    fn increment_stack_size(&mut self, processor: &FastProcessor) {
441        let new_overflow_value = processor.stack_get(15);
442        self.overflow_table.push(new_overflow_value, processor.system().clock());
443    }
444
445    /// Pops a value from the overflow table and records it for replay.
446    #[inline(always)]
447    fn decrement_stack_size(&mut self) {
448        if let Some(popped_value) = self.overflow_table.pop() {
449            let new_overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
450            self.overflow_replay.record_pop_overflow(popped_value, new_overflow_addr);
451        }
452    }
453}
454
455impl Tracer for ExecutionTracer {
456    type Processor = FastProcessor;
457
458    /// When sufficiently many clock cycles have elapsed, starts a new trace state. Also updates the
459    /// internal block stack.
460    #[inline(always)]
461    fn start_clock_cycle(
462        &mut self,
463        processor: &FastProcessor,
464        continuation: Continuation,
465        continuation_stack: &ContinuationStack,
466        current_forest: &Arc<MastForest>,
467    ) {
468        // check if we need to start a new trace state
469        if processor.system().clock().as_usize().is_multiple_of(self.fragment_size) {
470            self.start_new_fragment_context(
471                SystemState::from_processor(processor),
472                processor
473                    .stack_top()
474                    .try_into()
475                    .expect("stack_top expected to be MIN_STACK_DEPTH elements"),
476                continuation_stack.clone(),
477                continuation.clone(),
478                current_forest.clone(),
479            );
480        }
481
482        match continuation {
483            Continuation::ResumeBasicBlock { node_id, batch_index, op_idx_in_batch } => {
484                // Update overflow table based on whether the operation increments or decrements
485                // the stack size.
486                let basic_block = current_forest[node_id].unwrap_basic_block();
487                let op = &basic_block.op_batches()[batch_index].ops()[op_idx_in_batch];
488
489                if op.increments_stack_size() {
490                    self.increment_stack_size(processor);
491                } else if op.decrements_stack_size() {
492                    self.decrement_stack_size();
493                }
494
495                if matches!(op, Operation::EvalCircuit) {
496                    self.is_eval_circuit_op = true;
497                }
498            },
499            Continuation::StartNode(mast_node_id) => match &current_forest[mast_node_id] {
500                MastNode::Join(_) => {
501                    self.record_control_node_start(
502                        &current_forest[mast_node_id],
503                        processor,
504                        current_forest,
505                    );
506                },
507                MastNode::Split(_) | MastNode::Loop(_) => {
508                    self.record_control_node_start(
509                        &current_forest[mast_node_id],
510                        processor,
511                        current_forest,
512                    );
513                    self.decrement_stack_size();
514                },
515                MastNode::Call(_) => {
516                    self.record_control_node_start(
517                        &current_forest[mast_node_id],
518                        processor,
519                        current_forest,
520                    );
521                    self.overflow_table.start_context();
522                },
523                MastNode::Dyn(dyn_node) => {
524                    self.record_control_node_start(
525                        &current_forest[mast_node_id],
526                        processor,
527                        current_forest,
528                    );
529                    // DYN and DYNCALL both drop the memory address from the stack.
530                    self.decrement_stack_size();
531
532                    if dyn_node.is_dyncall() {
533                        // Note: the overflow pop (stack size decrement above) must happen before
534                        // starting the new context so that it operates on the old context's
535                        // overflow table, per the semantics of dyncall.
536                        self.overflow_table.start_context();
537                    }
538                },
539                MastNode::Block(basic_block_node) => {
540                    self.hasher_for_chiplet.record_hash_basic_block(
541                        current_forest.clone(),
542                        mast_node_id,
543                        basic_block_node.digest(),
544                    );
545                    let block_addr =
546                        self.hasher_chiplet_shim.record_hash_basic_block(basic_block_node);
547                    let parent_addr =
548                        self.block_stack.push(block_addr, None);
549                    self.block_stack_replay.record_node_start_parent_addr(parent_addr);
550                },
551                MastNode::External(_) => unreachable!(
552                    "start_clock_cycle is guaranteed not to be called on external nodes"
553                ),
554            },
555            Continuation::Respan { node_id: _, batch_index: _ } => {
556                self.block_stack.peek_mut().addr += CONTROLLER_ROWS_PER_PERM_FELT;
557            },
558            Continuation::FinishLoop { node_id: _, was_entered }
559                if was_entered && processor.stack_get(0) == ONE =>
560            {
561                // This is a REPEAT operation, which drops the condition (top element) off the stack
562                self.decrement_stack_size();
563            },
564            Continuation::FinishJoin(_)
565            | Continuation::FinishSplit(_)
566            | Continuation::FinishCall(_)
567            | Continuation::FinishDyn(_)
568            | Continuation::FinishLoop { .. } // not a REPEAT, which is handled separately above
569            | Continuation::FinishBasicBlock(_) => {
570                // The END of a loop that was entered drops the condition from the stack.
571                if matches!(
572                    &continuation,
573                    Continuation::FinishLoop { was_entered, .. } if *was_entered
574                ) {
575                    self.decrement_stack_size();
576                }
577
578                // This is an END operation; pop the block stack and record the node end
579                let block_info = self.block_stack.pop();
580                self.record_node_end(&block_info);
581
582                if let Some(ctx_info) = block_info.ctx_info {
583                    self.record_execution_context(ExecutionContextSystemInfo {
584                        parent_ctx: ctx_info.parent_ctx,
585                        parent_fn_hash: ctx_info.parent_fn_hash,
586                    });
587
588                    self.pending_restore_context = true;
589                }
590            },
591            Continuation::FinishExternal(_)
592            | Continuation::EnterForest(_)
593            | Continuation::AfterExitDecorators(_) => {
594                panic!(
595                    "FinishExternal, EnterForest, and AfterExitDecorators continuations are guaranteed not to be passed here"
596                )
597            },
598        }
599    }
600
601    #[inline(always)]
602    fn record_mast_forest_resolution(&mut self, node_id: MastNodeId, forest: &Arc<MastForest>) {
603        self.external.record_resolution(node_id, forest.clone());
604    }
605
606    #[inline(always)]
607    fn record_hasher_permute(
608        &mut self,
609        input_state: [Felt; STATE_WIDTH],
610        output_state: [Felt; STATE_WIDTH],
611    ) {
612        self.hasher_for_chiplet.record_permute_input(input_state);
613        self.hasher_chiplet_shim.record_permute_output(output_state);
614    }
615
616    #[inline(always)]
617    fn record_hasher_build_merkle_root(
618        &mut self,
619        node: Word,
620        path: Option<&MerklePath>,
621        index: Felt,
622        output_root: Word,
623    ) {
624        let path = path.expect("execution tracer expects a valid Merkle path");
625        self.hasher_chiplet_shim.record_build_merkle_root(path, output_root);
626        self.hasher_for_chiplet.record_build_merkle_root(node, path.clone(), index);
627    }
628
629    #[inline(always)]
630    fn record_hasher_update_merkle_root(
631        &mut self,
632        old_value: Word,
633        new_value: Word,
634        path: Option<&MerklePath>,
635        index: Felt,
636        old_root: Word,
637        new_root: Word,
638    ) {
639        let path = path.expect("execution tracer expects a valid Merkle path");
640        self.hasher_chiplet_shim.record_update_merkle_root(path, old_root, new_root);
641        self.hasher_for_chiplet.record_update_merkle_root(
642            old_value,
643            new_value,
644            path.clone(),
645            index,
646        );
647    }
648
649    #[inline(always)]
650    fn record_memory_read_element(
651        &mut self,
652        element: Felt,
653        addr: Felt,
654        ctx: ContextId,
655        clk: RowIndex,
656    ) {
657        self.memory_reads.record_read_element(element, addr, ctx, clk);
658    }
659
660    #[inline(always)]
661    fn record_memory_read_word(&mut self, word: Word, addr: Felt, ctx: ContextId, clk: RowIndex) {
662        self.memory_reads.record_read_word(word, addr, ctx, clk);
663    }
664
665    #[inline(always)]
666    fn record_memory_write_element(
667        &mut self,
668        element: Felt,
669        addr: Felt,
670        ctx: ContextId,
671        clk: RowIndex,
672    ) {
673        self.memory_writes.record_write_element(element, addr, ctx, clk);
674    }
675
676    #[inline(always)]
677    fn record_memory_write_word(&mut self, word: Word, addr: Felt, ctx: ContextId, clk: RowIndex) {
678        self.memory_writes.record_write_word(word, addr, ctx, clk);
679    }
680
681    #[inline(always)]
682    fn record_memory_read_element_pair(
683        &mut self,
684        element_0: Felt,
685        addr_0: Felt,
686        element_1: Felt,
687        addr_1: Felt,
688        ctx: ContextId,
689        clk: RowIndex,
690    ) {
691        self.memory_reads.record_read_element(element_0, addr_0, ctx, clk);
692        self.memory_reads.record_read_element(element_1, addr_1, ctx, clk);
693    }
694
695    #[inline(always)]
696    fn record_memory_read_dword(
697        &mut self,
698        words: [Word; 2],
699        addr: Felt,
700        ctx: ContextId,
701        clk: RowIndex,
702    ) {
703        self.memory_reads.record_read_word(words[0], addr, ctx, clk);
704        self.memory_reads.record_read_word(words[1], addr + PTR_OFFSET_WORD, ctx, clk);
705    }
706
707    #[inline(always)]
708    fn record_dyncall_memory(
709        &mut self,
710        callee_hash: Word,
711        read_addr: Felt,
712        read_ctx: ContextId,
713        fmp_ctx: ContextId,
714        clk: RowIndex,
715    ) {
716        self.memory_reads.record_read_word(callee_hash, read_addr, read_ctx, clk);
717        self.memory_writes.record_write_element(FMP_INIT_VALUE, FMP_ADDR, fmp_ctx, clk);
718    }
719
720    #[inline(always)]
721    fn record_crypto_stream(
722        &mut self,
723        plaintext: [Word; 2],
724        src_addr: Felt,
725        ciphertext: [Word; 2],
726        dst_addr: Felt,
727        ctx: ContextId,
728        clk: RowIndex,
729    ) {
730        self.memory_reads.record_read_word(plaintext[0], src_addr, ctx, clk);
731        self.memory_reads
732            .record_read_word(plaintext[1], src_addr + PTR_OFFSET_WORD, ctx, clk);
733        self.memory_writes.record_write_word(ciphertext[0], dst_addr, ctx, clk);
734        self.memory_writes
735            .record_write_word(ciphertext[1], dst_addr + PTR_OFFSET_WORD, ctx, clk);
736    }
737
738    #[inline(always)]
739    fn record_pipe(&mut self, words: [Word; 2], addr: Felt, ctx: ContextId, clk: RowIndex) {
740        self.advice.record_pop_stack_dword(words);
741        self.memory_writes.record_write_word(words[0], addr, ctx, clk);
742        self.memory_writes.record_write_word(words[1], addr + PTR_OFFSET_WORD, ctx, clk);
743    }
744
745    #[inline(always)]
746    fn record_advice_pop_stack(&mut self, value: Felt) {
747        self.advice.record_pop_stack(value);
748    }
749
750    #[inline(always)]
751    fn record_advice_pop_stack_word(&mut self, word: Word) {
752        self.advice.record_pop_stack_word(word);
753    }
754
755    #[inline(always)]
756    fn record_u32and(&mut self, a: Felt, b: Felt) {
757        self.bitwise.record_u32and(a, b);
758    }
759
760    #[inline(always)]
761    fn record_u32xor(&mut self, a: Felt, b: Felt) {
762        self.bitwise.record_u32xor(a, b);
763    }
764
765    #[inline(always)]
766    fn record_u32_range_checks(&mut self, clk: RowIndex, u32_lo: Felt, u32_hi: Felt) {
767        let (t1, t0) = split_u32_into_u16(u32_lo.as_canonical_u64());
768        let (t3, t2) = split_u32_into_u16(u32_hi.as_canonical_u64());
769
770        self.range_checker.record_range_check_u32(clk, [t0, t1, t2, t3]);
771    }
772
773    #[inline(always)]
774    fn record_kernel_proc_access(&mut self, proc_hash: Word) {
775        self.kernel.record_kernel_proc_access(proc_hash);
776    }
777
778    #[inline(always)]
779    fn record_circuit_evaluation(&mut self, circuit_evaluation: CircuitEvaluation) {
780        self.ace.record_circuit_evaluation(circuit_evaluation);
781    }
782
783    #[inline(always)]
784    fn finalize_clock_cycle(
785        &mut self,
786        processor: &FastProcessor,
787        _op_helper_registers: OperationHelperRegisters,
788        _current_forest: &Arc<MastForest>,
789    ) {
790        // Restore the overflow table context for Call/Syscall/Dyncall END. This is deferred
791        // from start_clock_cycle because finalize_clock_cycle is only called when the operation
792        // succeeds (i.e., the stack depth check in processor.restore_context() passes).
793        if self.pending_restore_context {
794            // Restore context for call/syscall/dyncall: pop the current context's
795            // (empty) overflow stack and restore the previous context's overflow state.
796            self.overflow_table.restore_context();
797            self.overflow_replay.record_restore_context_overflow_addr(
798                MIN_STACK_DEPTH + self.overflow_table.num_elements_in_current_ctx(),
799                self.overflow_table.last_update_clk_in_current_ctx(),
800            );
801
802            self.pending_restore_context = false;
803        }
804
805        // Record all memory reads performed during EvalCircuit operations. We run this in
806        // `finalize_clock_cycle` to ensure that the memory reads are only recorded if the operation
807        // succeeds (and hence the values read from the stack can be assumed to be valid).
808        if self.is_eval_circuit_op {
809            let ptr = processor.stack_get(0);
810            let num_read = processor.stack_get(1).as_canonical_u64();
811            let num_eval = processor.stack_get(2).as_canonical_u64();
812            let ctx = processor.ctx();
813            let clk = processor.clock();
814
815            let num_read_rows = num_read / 2;
816
817            let mut addr = ptr;
818            for _ in 0..num_read_rows {
819                let word = processor
820                    .memory()
821                    .read_word(ctx, addr, clk)
822                    .expect("EvalCircuit memory read should not fail after successful execution");
823                self.memory_reads.record_read_word(word, addr, ctx, clk);
824                addr += PTR_OFFSET_WORD;
825            }
826            for _ in 0..num_eval {
827                let element = processor
828                    .memory()
829                    .read_element(ctx, addr)
830                    .expect("EvalCircuit memory read should not fail after successful execution");
831                self.memory_reads.record_read_element(element, addr, ctx, clk);
832                addr += PTR_OFFSET_ELEM;
833            }
834
835            self.is_eval_circuit_op = false;
836        }
837    }
838}
839
840// HASHER CHIPLET SHIM
841// ================================================================================================
842
843/// The number of controller rows per permutation request (input + output = 2), as u32.
844const NUM_HASHER_ROWS_PER_PERMUTATION: u32 = CONTROLLER_ROWS_PER_PERMUTATION as u32;
845
846/// Implements a shim for the hasher chiplet, where the responses of the hasher chiplet are emulated
847/// and recorded for later replay.
848///
849/// This is used to simulate hasher operations in parallel trace generation without needing to
850/// actually generate the hasher trace. All hasher operations are recorded during fast execution and
851/// then replayed during core trace generation.
852#[derive(Debug)]
853pub struct HasherChipletShim {
854    /// The address of the next MAST node encountered during execution. This field is used to keep
855    /// track of the number of rows in the hasher chiplet, from which the address of the next MAST
856    /// node is derived.
857    addr: u32,
858    /// Replay for the hasher chiplet responses, recording only the hasher chiplet responses.
859    hasher_replay: HasherResponseReplay,
860    block_addr_replay: BlockAddressReplay,
861}
862
863impl HasherChipletShim {
864    /// Creates a new [HasherChipletShim].
865    pub fn new() -> Self {
866        Self {
867            addr: 1,
868            hasher_replay: HasherResponseReplay::default(),
869            block_addr_replay: BlockAddressReplay::default(),
870        }
871    }
872
873    /// Records the address returned from a call to `Hasher::hash_control_block()`.
874    pub fn record_hash_control_block(&mut self) -> Felt {
875        let block_addr = Felt::from_u32(self.addr);
876
877        self.block_addr_replay.record_block_address(block_addr);
878        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION;
879
880        block_addr
881    }
882
883    /// Records the address returned from a call to `Hasher::hash_basic_block()`.
884    pub fn record_hash_basic_block(&mut self, basic_block_node: &BasicBlockNode) -> Felt {
885        let block_addr = Felt::from_u32(self.addr);
886
887        self.block_addr_replay.record_block_address(block_addr);
888        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION * basic_block_node.num_op_batches() as u32;
889
890        block_addr
891    }
892    /// Records the result of a call to `Hasher::permute()`.
893    pub fn record_permute_output(&mut self, hashed_state: [Felt; 12]) {
894        self.hasher_replay.record_permute(Felt::from_u32(self.addr), hashed_state);
895        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION;
896    }
897
898    /// Records the result of a call to `Hasher::build_merkle_root()`.
899    pub fn record_build_merkle_root(&mut self, path: &MerklePath, computed_root: Word) {
900        self.hasher_replay
901            .record_build_merkle_root(Felt::from_u32(self.addr), computed_root);
902        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION * path.depth() as u32;
903    }
904
905    /// Records the result of a call to `Hasher::update_merkle_root()`.
906    pub fn record_update_merkle_root(&mut self, path: &MerklePath, old_root: Word, new_root: Word) {
907        self.hasher_replay
908            .record_update_merkle_root(Felt::from_u32(self.addr), old_root, new_root);
909
910        // The Merkle path is verified twice: once for the old root and once for the new root.
911        self.addr += 2 * NUM_HASHER_ROWS_PER_PERMUTATION * path.depth() as u32;
912    }
913
914    pub fn extract_replay(&mut self) -> (HasherResponseReplay, BlockAddressReplay) {
915        (
916            core::mem::take(&mut self.hasher_replay),
917            core::mem::take(&mut self.block_addr_replay),
918        )
919    }
920}
921
922impl Default for HasherChipletShim {
923    fn default() -> Self {
924        Self::new()
925    }
926}