Skip to main content

miden_processor/trace/
execution_tracer.rs

1use alloc::{collections::BTreeMap, sync::Arc, vec::Vec};
2
3use miden_air::trace::chiplets::hasher::{
4    CONTROLLER_ROWS_PER_PERM_FELT, CONTROLLER_ROWS_PER_PERMUTATION, STATE_WIDTH,
5};
6use miden_core::{FMP_ADDR, FMP_INIT_VALUE, operations::Operation};
7
8use super::{
9    block_stack::{BlockInfo, BlockStack, ExecutionContextInfo},
10    stack::OverflowTable,
11    trace_state::{
12        AceReplay, AdviceReplay, BitwiseReplay, BlockAddressReplay, BlockStackReplay,
13        CoreTraceFragmentContext, CoreTraceState, DecoderState, ExecutionContextReplay,
14        ExecutionContextSystemInfo, ExecutionReplay, HasherRequestReplay, HasherResponseReplay,
15        KernelReplay, MastForestResolutionReplay, MemoryReadsReplay, MemoryWritesReplay,
16        RangeCheckerReplay, StackOverflowReplay, StackState, SystemState,
17    },
18    utils::split_u32_into_u16,
19};
20use crate::{
21    ContextId, EMPTY_WORD, FastProcessor, Felt, MIN_STACK_DEPTH, ONE, RowIndex, Word, ZERO,
22    continuation_stack::{Continuation, ContinuationStack},
23    crypto::merkle::MerklePath,
24    mast::{
25        BasicBlockNode, JoinNode, LoopNode, MastForest, MastForestId, MastNode, MastNodeExt,
26        MastNodeId, SparseMastForest, SparseMastForestBuilder, SplitNode, VisitKind,
27    },
28    processor::{Processor, StackInterface, SystemInterface},
29    trace::chiplets::{CircuitEvaluation, PTR_OFFSET_ELEM, PTR_OFFSET_WORD},
30    tracer::{OperationHelperRegisters, Tracer},
31    utils::Idx,
32};
33
34// STATE SNAPSHOT
35// ================================================================================================
36
37/// Execution state snapshot, used to record the state at the start of a trace fragment.
38#[derive(Debug)]
39struct StateSnapshot {
40    state: CoreTraceState,
41    /// Continuation stack with forest references already translated to [`MastForestId`]s into the
42    /// `mast_forest_store` of the [`TraceGenerationContext`] being built.
43    continuation_stack: ContinuationStack<MastForestId>,
44    /// The active forest at the start of this fragment, in `mast_forest_store`.
45    initial_mast_forest_id: MastForestId,
46}
47
48// TRACE GENERATION CONTEXT
49// ================================================================================================
50
51#[derive(Debug)]
52pub struct TraceGenerationContext {
53    /// The list of trace fragment contexts built during execution.
54    pub core_trace_contexts: Vec<CoreTraceFragmentContext>,
55
56    /// Sparse MAST forests, one per source [`MastForest`] visited during execution.
57    ///
58    /// Each entry contains only the [`MastNode`]s that were actually visited, while preserving the
59    /// original [`MastNodeId`]s of the source forest. References from `CoreTraceFragmentContext`,
60    /// `MastForestResolutionReplay`, and `HasherOp::HashBasicBlock` are encoded as
61    /// [`MastForestId`]s into this vector.
62    pub mast_forest_store: Vec<Arc<SparseMastForest>>,
63
64    // Replays that contain additional data needed to generate the range checker and chiplets
65    // columns.
66    pub range_checker_replay: RangeCheckerReplay,
67    pub memory_writes: MemoryWritesReplay,
68    pub bitwise_replay: BitwiseReplay,
69    pub hasher_for_chiplet: HasherRequestReplay,
70    pub kernel_replay: KernelReplay,
71    pub ace_replay: AceReplay,
72
73    /// The number of rows per core trace fragment, except for the last fragment which may be
74    /// shorter.
75    pub fragment_size: usize,
76
77    /// The maximum number of field elements allowed on the operand stack in an active execution
78    /// context.
79    pub max_stack_depth: usize,
80}
81
82/// Builder for recording the context to generate trace fragments during execution.
83///
84/// Specifically, this records the information necessary to be able to generate the trace in
85/// fragments of configurable length. This requires storing state at the very beginning of the
86/// fragment before any operations are executed, as well as recording the various values read during
87/// execution in the corresponding "replays" (e.g. values read from memory are recorded in
88/// `MemoryReadsReplay`, values read from the advice provider are recorded in `AdviceReplay``, etc).
89///
90/// Then, to generate a trace fragment, we initialize the state of the processor using the stored
91/// snapshot from the beginning of the fragment, and replay the recorded values as they are
92/// encountered during execution (e.g. when encountering a memory read operation, we will replay the
93/// value rather than querying the memory chiplet).
94#[derive(Debug)]
95pub struct ExecutionTracer {
96    // State stored at the start of a core trace fragment.
97    //
98    // This field is only set to `None` at initialization, and is populated when starting a new
99    // trace fragment with `Self::start_new_fragment_context()`. Hence, on the first call to
100    // `Self::start_new_fragment_context()`, we don't extract a new `TraceFragmentContext`, but in
101    // every other call, we do.
102    state_snapshot: Option<StateSnapshot>,
103
104    // Replay data aggregated throughout the execution of a core trace fragment
105    overflow_table: OverflowTable,
106    overflow_replay: StackOverflowReplay,
107
108    block_stack: BlockStack,
109    block_stack_replay: BlockStackReplay,
110    execution_context_replay: ExecutionContextReplay,
111
112    hasher_chiplet_shim: HasherChipletShim,
113    memory_reads: MemoryReadsReplay,
114    advice: AdviceReplay,
115    external: MastForestResolutionReplay,
116
117    // Replays that contain additional data needed to generate the range checker and chiplets
118    // columns.
119    range_checker: RangeCheckerReplay,
120    memory_writes: MemoryWritesReplay,
121    bitwise: BitwiseReplay,
122    kernel: KernelReplay,
123    hasher_for_chiplet: HasherRequestReplay,
124    ace: AceReplay,
125
126    // Output
127    fragment_contexts: Vec<CoreTraceFragmentContext>,
128
129    /// Per-source-forest sparse builders, indexed by `mast_forest_indices`.
130    ///
131    /// Each builder accumulates the [`MastNodeId`]s of nodes visited inside its source forest
132    /// during execution, and is finalized into an [`Arc<SparseMastForest>`] in
133    /// [`Self::into_trace_generation_context`].
134    mast_forest_builders: Vec<SparseMastForestBuilder>,
135
136    /// Maps a source forest's `Arc::as_ptr` identity to its [`MastForestId`] in
137    /// `mast_forest_builders` (and, by construction, the eventual id in
138    /// `TraceGenerationContext::mast_forest_store`).
139    ///
140    /// Pointer identity is sound here because the trace-generation flow never crosses a process
141    /// boundary: the tracer builds the store and the replay processor consumes it in the same
142    /// run.
143    mast_forest_ids: BTreeMap<*const MastForest, MastForestId>,
144
145    /// The number of rows per core trace fragment.
146    fragment_size: usize,
147
148    /// The maximum number of field elements allowed on the operand stack in an active execution
149    /// context.
150    max_stack_depth: usize,
151
152    /// Flag set in `start_clock_cycle` when a Call/Syscall/Dyncall END is encountered, consumed
153    /// in `finalize_clock_cycle` to call `overflow_table.restore_context()`. This is deferred to
154    /// `finalize_clock_cycle` because `finalize_clock_cycle` is only called when the operation
155    /// succeeds (i.e., the stack depth check passes).
156    pending_restore_context: bool,
157
158    /// Flag set in `start_clock_cycle` when an `EvalCircuit` operation is encountered, consumed
159    /// in `finalize_clock_cycle` to record the memory reads performed by the operation.
160    is_eval_circuit_op: bool,
161}
162
163impl ExecutionTracer {
164    /// Creates a new `ExecutionTracer` with the given fragment size.
165    #[inline(always)]
166    pub fn new(fragment_size: usize, max_stack_depth: usize) -> Self {
167        Self {
168            state_snapshot: None,
169            overflow_table: OverflowTable::default(),
170            overflow_replay: StackOverflowReplay::default(),
171            block_stack: BlockStack::default(),
172            block_stack_replay: BlockStackReplay::default(),
173            execution_context_replay: ExecutionContextReplay::default(),
174            hasher_chiplet_shim: HasherChipletShim::default(),
175            memory_reads: MemoryReadsReplay::default(),
176            range_checker: RangeCheckerReplay::default(),
177            memory_writes: MemoryWritesReplay::default(),
178            advice: AdviceReplay::default(),
179            bitwise: BitwiseReplay::default(),
180            kernel: KernelReplay::default(),
181            hasher_for_chiplet: HasherRequestReplay::default(),
182            ace: AceReplay::default(),
183            external: MastForestResolutionReplay::default(),
184            fragment_contexts: Vec::new(),
185            mast_forest_builders: Vec::new(),
186            mast_forest_ids: BTreeMap::new(),
187            fragment_size,
188            max_stack_depth,
189            pending_restore_context: false,
190            is_eval_circuit_op: false,
191        }
192    }
193
194    /// Returns the [`MastForestId`] of `forest` in [`Self::mast_forest_builders`], creating a new
195    /// builder for it on first encounter. Forests are identified by `Arc::as_ptr`.
196    #[inline]
197    fn forest_id(&mut self, forest: &Arc<MastForest>) -> MastForestId {
198        let key = Arc::as_ptr(forest);
199        if let Some(&id) = self.mast_forest_ids.get(&key) {
200            return id;
201        }
202
203        let id = MastForestId::from(self.mast_forest_builders.len() as u32);
204        self.mast_forest_builders.push(SparseMastForestBuilder::new(forest.clone()));
205        self.mast_forest_ids.insert(key, id);
206        id
207    }
208
209    /// Records that the node with id `node_id` was visited inside `forest`. Also records the
210    /// node's immediate children (if any) as digest-only references.
211    ///
212    /// The parent is recorded as a [`VisitKind::FullVisit`] so its full [`MastNode`] is available
213    /// at replay time. Children are recorded as [`VisitKind::DigestOnly`] because trace
214    /// generation only needs their digest when building the parent's trace row (e.g. a Split
215    /// node needs the digest of *both* branches even if only one is taken; a Join needs both
216    /// children's digests; a Call needs the callee's digest). If a child is later actually
217    /// entered, a subsequent `record_visit` call promotes it to a full visit.
218    ///
219    /// Keeping un-entered children as digest-only means accidental entry into a pruned node
220    /// surfaces as a clean `get_node_by_id` miss rather than a partially-populated node.
221    #[inline]
222    fn record_visit(&mut self, forest: &Arc<MastForest>, node_id: MastNodeId) {
223        let id = self.forest_id(forest);
224        self.mast_forest_builders[id.to_usize()].record_visit(node_id, VisitKind::FullVisit);
225
226        if let Some(node) = forest.get_node_by_id(node_id) {
227            node.for_each_child(|child_id| {
228                self.mast_forest_builders[id.to_usize()]
229                    .record_visit(child_id, VisitKind::DigestOnly);
230            });
231        }
232    }
233
234    /// Translates a live continuation stack carrying `Arc<MastForest>` references into one carrying
235    /// [`MastForestId`]s into `mast_forest_builders`.
236    fn translate_continuation_stack(
237        &mut self,
238        live: ContinuationStack<Arc<MastForest>>,
239    ) -> ContinuationStack<MastForestId> {
240        let mut translated: ContinuationStack<MastForestId> = ContinuationStack::default();
241        for cont in live.into_inner() {
242            let translated_cont = match cont {
243                Continuation::EnterForest { forest, package_debug_info } => {
244                    Continuation::EnterForest {
245                        forest: self.forest_id(&forest),
246                        package_debug_info,
247                    }
248                },
249                Continuation::StartNode(id) => Continuation::StartNode(id),
250                Continuation::FinishJoin(id) => Continuation::FinishJoin(id),
251                Continuation::FinishSplit(id) => Continuation::FinishSplit(id),
252                Continuation::FinishLoop(node_id) => Continuation::FinishLoop(node_id),
253                Continuation::FinishCall(id) => Continuation::FinishCall(id),
254                Continuation::FinishDyn(id) => Continuation::FinishDyn(id),
255                Continuation::ResumeBasicBlock { node_id, batch_index, op_idx_in_batch } => {
256                    Continuation::ResumeBasicBlock { node_id, batch_index, op_idx_in_batch }
257                },
258                Continuation::Respan { node_id, batch_index } => {
259                    Continuation::Respan { node_id, batch_index }
260                },
261                Continuation::FinishBasicBlock(id) => Continuation::FinishBasicBlock(id),
262            };
263            translated.push_continuation(translated_cont);
264        }
265        translated
266    }
267
268    /// Convert the `ExecutionTracer` into a [TraceGenerationContext] using the data accumulated
269    /// during execution.
270    #[inline(always)]
271    pub fn into_trace_generation_context(mut self) -> TraceGenerationContext {
272        // If there is an ongoing trace state being built, finish it
273        self.finish_current_fragment_context();
274
275        // Finalize each per-source-forest builder into a `SparseMastForest`. Indices stored on
276        // fragments and replays line up with the position in this vector by construction (the
277        // builders were appended in the same order the indices were assigned).
278        let mast_forest_store = self
279            .mast_forest_builders
280            .into_iter()
281            .map(|builder| Arc::new(builder.finalize()))
282            .collect();
283
284        TraceGenerationContext {
285            core_trace_contexts: self.fragment_contexts,
286            mast_forest_store,
287            range_checker_replay: self.range_checker,
288            memory_writes: self.memory_writes,
289            bitwise_replay: self.bitwise,
290            kernel_replay: self.kernel,
291            hasher_for_chiplet: self.hasher_for_chiplet,
292            ace_replay: self.ace,
293            fragment_size: self.fragment_size,
294            max_stack_depth: self.max_stack_depth,
295        }
296    }
297
298    // HELPERS
299    // -------------------------------------------------------------------------------------------
300
301    /// Captures the internal state into a new [TraceFragmentContext] (stored internally), resets
302    /// the internal replay state of the builder, and records a new state snapshot, marking the
303    /// beginning of the next trace state.
304    ///
305    /// This must be called at the beginning of a new trace fragment, before executing the first
306    /// operation. Internal replay fields are expected to be accessed during execution of this new
307    /// fragment to record data to be replayed by the trace fragment generators.
308    #[inline(always)]
309    fn start_new_fragment_context(
310        &mut self,
311        system_state: SystemState,
312        stack_top: [Felt; MIN_STACK_DEPTH],
313        mut continuation_stack: ContinuationStack<Arc<MastForest>>,
314        continuation: Continuation<Arc<MastForest>>,
315        current_forest: Arc<MastForest>,
316    ) {
317        // If there is an ongoing snapshot, finish it
318        self.finish_current_fragment_context();
319
320        // Start a new snapshot
321        let decoder_state = {
322            if self.block_stack.is_empty() {
323                DecoderState { current_addr: ZERO, parent_addr: ZERO }
324            } else {
325                let block_info = self.block_stack.peek();
326
327                DecoderState {
328                    current_addr: block_info.addr,
329                    parent_addr: block_info.parent_addr,
330                }
331            }
332        };
333        let stack = {
334            let stack_depth = MIN_STACK_DEPTH + self.overflow_table.num_elements_in_current_ctx();
335            let last_overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
336            StackState::new(stack_top, stack_depth, last_overflow_addr)
337        };
338
339        // Push new continuation corresponding to the current execution state
340        continuation_stack.push_continuation(continuation);
341
342        // Translate the live `Arc<MastForest>`-bearing continuation stack into one indexed by
343        // `MastForestId` into `mast_forest_builders`, registering any newly-encountered forests
344        // along the way.
345        let initial_mast_forest_id = self.forest_id(&current_forest);
346        let translated_stack = self.translate_continuation_stack(continuation_stack);
347
348        self.state_snapshot = Some(StateSnapshot {
349            state: CoreTraceState {
350                system: system_state,
351                decoder: decoder_state,
352                stack,
353            },
354            continuation_stack: translated_stack,
355            initial_mast_forest_id,
356        });
357    }
358
359    #[inline(always)]
360    fn record_control_node_start<P: Processor>(
361        &mut self,
362        node: &MastNode,
363        processor: &P,
364        current_forest: &MastForest,
365    ) {
366        let ctx_info = match node {
367            MastNode::Join(node) => {
368                let child1_hash = current_forest
369                    .get_node_by_id(node.first())
370                    .expect("join node's first child expected to be in the forest")
371                    .digest();
372                let child2_hash = current_forest
373                    .get_node_by_id(node.second())
374                    .expect("join node's second child expected to be in the forest")
375                    .digest();
376                self.hasher_for_chiplet.record_hash_control_block(
377                    child1_hash,
378                    child2_hash,
379                    JoinNode::DOMAIN,
380                    node.digest(),
381                );
382
383                None
384            },
385            MastNode::Split(node) => {
386                let child1_hash = current_forest
387                    .get_node_by_id(node.on_true())
388                    .expect("split node's true child expected to be in the forest")
389                    .digest();
390                let child2_hash = current_forest
391                    .get_node_by_id(node.on_false())
392                    .expect("split node's false child expected to be in the forest")
393                    .digest();
394                self.hasher_for_chiplet.record_hash_control_block(
395                    child1_hash,
396                    child2_hash,
397                    SplitNode::DOMAIN,
398                    node.digest(),
399                );
400
401                None
402            },
403            MastNode::Loop(node) => {
404                let body_hash = current_forest
405                    .get_node_by_id(node.body())
406                    .expect("loop node's body expected to be in the forest")
407                    .digest();
408
409                self.hasher_for_chiplet.record_hash_control_block(
410                    body_hash,
411                    EMPTY_WORD,
412                    LoopNode::DOMAIN,
413                    node.digest(),
414                );
415
416                None
417            },
418            MastNode::Call(node) => {
419                let callee_hash = current_forest
420                    .get_node_by_id(node.callee())
421                    .expect("call node's callee expected to be in the forest")
422                    .digest();
423
424                self.hasher_for_chiplet.record_hash_control_block(
425                    callee_hash,
426                    EMPTY_WORD,
427                    node.domain(),
428                    node.digest(),
429                );
430
431                let overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
432                Some(ExecutionContextInfo::new(
433                    processor.system().ctx(),
434                    processor.system().caller_hash(),
435                    processor.stack().depth(),
436                    overflow_addr,
437                ))
438            },
439            MastNode::Dyn(dyn_node) => {
440                self.hasher_for_chiplet.record_hash_control_block(
441                    EMPTY_WORD,
442                    EMPTY_WORD,
443                    dyn_node.domain(),
444                    dyn_node.digest(),
445                );
446
447                if dyn_node.is_dyncall() {
448                    // DYNCALL drops the top stack element (the memory address holding the
449                    // callee hash) and records the stack state *after* the drop as the new
450                    // context.
451                    //
452                    // `record_control_node_start()` is called *before* `decrement_stack_size()`,
453                    // so we must compute the post-drop overflow address without actually
454                    // performing the pop.  We use `clk_after_pop_in_current_ctx()` which
455                    // returns the clock of the second-to-last overflow entry (i.e. what
456                    // `last_update_clk_in_current_ctx()` would return after the pop), or ZERO
457                    // when the overflow stack has ≤1 entry and would become empty.
458                    //
459                    // When the stack is already at MIN_STACK_DEPTH the drop does not reduce
460                    // the depth and the overflow address is ZERO — mirroring the same guard
461                    // already present in the parallel-tracer path.  See #2813 / PR #2904.
462                    let (stack_depth_after_drop, overflow_addr) =
463                        if processor.stack().depth() > MIN_STACK_DEPTH as u32 {
464                            (
465                                processor.stack().depth() - 1,
466                                self.overflow_table.clk_after_pop_in_current_ctx(),
467                            )
468                        } else {
469                            (processor.stack().depth(), ZERO)
470                        };
471                    Some(ExecutionContextInfo::new(
472                        processor.system().ctx(),
473                        processor.system().caller_hash(),
474                        stack_depth_after_drop,
475                        overflow_addr,
476                    ))
477                } else {
478                    None
479                }
480            },
481            MastNode::Block(_) => panic!(
482                "`ExecutionTracer::record_basic_block_start()` must be called instead for basic blocks"
483            ),
484            MastNode::External(_) => panic!(
485                "External nodes are guaranteed to be resolved before record_control_node_start is called"
486            ),
487        };
488
489        let block_addr = self.hasher_chiplet_shim.record_hash_control_block();
490        let parent_addr = self.block_stack.push(block_addr, ctx_info);
491        self.block_stack_replay.record_node_start_parent_addr(parent_addr);
492    }
493
494    /// Records the block address for an END operation based on the block being popped.
495    #[inline(always)]
496    fn record_node_end(&mut self, block_info: &BlockInfo) {
497        let (prev_addr, prev_parent_addr) = if self.block_stack.is_empty() {
498            (ZERO, ZERO)
499        } else {
500            let prev_block = self.block_stack.peek();
501            (prev_block.addr, prev_block.parent_addr)
502        };
503        self.block_stack_replay
504            .record_node_end(block_info.addr, prev_addr, prev_parent_addr);
505    }
506
507    /// Records the execution context system info for CALL/SYSCALL/DYNCALL operations.
508    #[inline(always)]
509    fn record_execution_context(&mut self, ctx_info: ExecutionContextSystemInfo) {
510        self.execution_context_replay.record_execution_context(ctx_info);
511    }
512
513    /// Records the current core trace state, if any.
514    ///
515    /// Specifically, extracts the stored [SnapshotStart] as well as all the replay data recorded
516    /// from the various components (e.g. memory, advice, etc) since the last call to this method.
517    /// Resets the internal state to default values to prepare for the next trace fragment.
518    ///
519    /// Note that the very first time that this is called (at clock cycle 0), the snapshot will not
520    /// contain any replay data, and so no core trace state will be recorded.
521    #[inline(always)]
522    fn finish_current_fragment_context(&mut self) {
523        if let Some(snapshot) = self.state_snapshot.take() {
524            // Extract the replays
525            let (hasher_replay, block_addr_replay) = self.hasher_chiplet_shim.extract_replay();
526            let memory_reads_replay = core::mem::take(&mut self.memory_reads);
527            let advice_replay = core::mem::take(&mut self.advice);
528            let external_replay = core::mem::take(&mut self.external);
529            let stack_overflow_replay = core::mem::take(&mut self.overflow_replay);
530            let block_stack_replay = core::mem::take(&mut self.block_stack_replay);
531            let execution_context_replay = core::mem::take(&mut self.execution_context_replay);
532
533            let trace_state = CoreTraceFragmentContext {
534                state: snapshot.state,
535                replay: ExecutionReplay {
536                    hasher: hasher_replay,
537                    block_address: block_addr_replay,
538                    memory_reads: memory_reads_replay,
539                    advice: advice_replay,
540                    mast_forest_resolution: external_replay,
541                    stack_overflow: stack_overflow_replay,
542                    block_stack: block_stack_replay,
543                    execution_context: execution_context_replay,
544                },
545                continuation: snapshot.continuation_stack,
546                initial_mast_forest_id: snapshot.initial_mast_forest_id,
547            };
548
549            self.fragment_contexts.push(trace_state);
550        }
551    }
552
553    /// Pushes the value at stack position 15 onto the overflow table. This must be called in
554    /// `Tracer::start_clock_cycle()` *before* the processor increments the stack size, where stack
555    /// position 15 at the start of the clock cycle corresponds to the element that overflows.
556    #[inline(always)]
557    fn increment_stack_size(&mut self, processor: &FastProcessor) {
558        let new_overflow_value = processor.stack_get(15);
559        self.overflow_table.push(new_overflow_value, processor.system().clock());
560    }
561
562    /// Pops a value from the overflow table and records it for replay.
563    #[inline(always)]
564    fn decrement_stack_size(&mut self) {
565        if let Some(popped_value) = self.overflow_table.pop() {
566            let new_overflow_addr = self.overflow_table.last_update_clk_in_current_ctx();
567            self.overflow_replay.record_pop_overflow(popped_value, new_overflow_addr);
568        }
569    }
570}
571
572impl Tracer for ExecutionTracer {
573    type Processor = FastProcessor;
574    type Forest = Arc<MastForest>;
575
576    /// When sufficiently many clock cycles have elapsed, starts a new trace state. Also updates the
577    /// internal block stack.
578    #[inline(always)]
579    fn start_clock_cycle(
580        &mut self,
581        processor: &FastProcessor,
582        continuation: Continuation<Arc<MastForest>>,
583        continuation_stack: &ContinuationStack<Arc<MastForest>>,
584        current_forest: &Arc<MastForest>,
585    ) {
586        // check if we need to start a new trace state
587        if processor.system().clock().as_usize().is_multiple_of(self.fragment_size) {
588            self.start_new_fragment_context(
589                SystemState::from_processor(processor),
590                processor
591                    .stack_top()
592                    .try_into()
593                    .expect("stack_top expected to be MIN_STACK_DEPTH elements"),
594                continuation_stack.clone(),
595                continuation.clone(),
596                current_forest.clone(),
597            );
598        }
599
600        // Record that the node being executed at this cycle was visited inside `current_forest`.
601        // This builds up the per-source-forest sparse subset used at trace generation time. Note
602        // that an `ExecutionTracer`'s state is invalidated on error, so it is safe to record the
603        // visit here even if the operation later fails (per Tracer invariants).
604        if let Some(visited_node_id) = node_id_for_visit(&continuation) {
605            self.record_visit(current_forest, visited_node_id);
606        }
607
608        match continuation {
609            Continuation::ResumeBasicBlock { node_id, batch_index, op_idx_in_batch } => {
610                // Update overflow table based on whether the operation increments or decrements
611                // the stack size.
612                let basic_block = current_forest[node_id].unwrap_basic_block();
613                let op = &basic_block.op_batches()[batch_index].ops()[op_idx_in_batch];
614
615                if op.increments_stack_size() {
616                    self.increment_stack_size(processor);
617                } else if op.decrements_stack_size() {
618                    self.decrement_stack_size();
619                }
620
621                if matches!(op, Operation::EvalCircuit) {
622                    self.is_eval_circuit_op = true;
623                }
624            },
625            Continuation::StartNode(mast_node_id) => match &current_forest[mast_node_id] {
626                MastNode::Join(_) | MastNode::Loop(_) => {
627                    self.record_control_node_start(
628                        &current_forest[mast_node_id],
629                        processor,
630                        current_forest,
631                    );
632                },
633                MastNode::Split(_) => {
634                    self.record_control_node_start(
635                        &current_forest[mast_node_id],
636                        processor,
637                        current_forest,
638                    );
639                    self.decrement_stack_size();
640                },
641                MastNode::Call(_) => {
642                    self.record_control_node_start(
643                        &current_forest[mast_node_id],
644                        processor,
645                        current_forest,
646                    );
647                    self.overflow_table.start_context();
648                },
649                MastNode::Dyn(dyn_node) => {
650                    self.record_control_node_start(
651                        &current_forest[mast_node_id],
652                        processor,
653                        current_forest,
654                    );
655                    // DYN and DYNCALL both drop the memory address from the stack.
656                    self.decrement_stack_size();
657
658                    if dyn_node.is_dyncall() {
659                        // Note: the overflow pop (stack size decrement above) must happen before
660                        // starting the new context so that it operates on the old context's
661                        // overflow table, per the semantics of dyncall.
662                        self.overflow_table.start_context();
663                    }
664                },
665                MastNode::Block(basic_block_node) => {
666                    let forest_id = self.forest_id(current_forest);
667                    self.hasher_for_chiplet.record_hash_basic_block(
668                        forest_id,
669                        mast_node_id,
670                        basic_block_node.digest(),
671                    );
672                    let block_addr =
673                        self.hasher_chiplet_shim.record_hash_basic_block(basic_block_node);
674                    let parent_addr = self.block_stack.push(block_addr, None);
675                    self.block_stack_replay.record_node_start_parent_addr(parent_addr);
676                },
677                MastNode::External(_) => unreachable!(
678                    "start_clock_cycle is guaranteed not to be called on external nodes"
679                ),
680            },
681            Continuation::Respan { node_id: _, batch_index: _ } => {
682                self.block_stack.peek_mut().addr += CONTROLLER_ROWS_PER_PERM_FELT;
683            },
684            Continuation::FinishLoop(_) if processor.stack_get(0) == ONE => {
685                // This is a REPEAT operation, which drops the condition (top element) off the stack
686                self.decrement_stack_size();
687            },
688            Continuation::FinishJoin(_)
689            | Continuation::FinishSplit(_)
690            | Continuation::FinishCall(_)
691            | Continuation::FinishDyn(_)
692            | Continuation::FinishLoop(_) // not a REPEAT, which is handled separately above
693            | Continuation::FinishBasicBlock(_) => {
694                // The END of a loop drops the condition from the stack (the body always pushes it).
695                if matches!(
696                    &continuation,
697                    Continuation::FinishLoop(_)
698                ) {
699                    self.decrement_stack_size();
700                }
701
702                // This is an END operation; pop the block stack and record the node end
703                let block_info = self.block_stack.pop();
704                self.record_node_end(&block_info);
705
706                if let Some(ctx_info) = block_info.ctx_info {
707                    self.record_execution_context(ExecutionContextSystemInfo {
708                        parent_ctx: ctx_info.parent_ctx,
709                        parent_fn_hash: ctx_info.parent_fn_hash,
710                    });
711
712                    self.pending_restore_context = true;
713                }
714            },
715            Continuation::EnterForest { .. } => {
716                panic!("EnterForest continuations are guaranteed not to be passed here")
717            },
718        }
719    }
720
721    #[inline(always)]
722    fn record_mast_forest_resolution(&mut self, node_id: MastNodeId, forest: &Arc<MastForest>) {
723        let forest_id = self.forest_id(forest);
724        self.external.record_resolution(node_id, forest_id);
725    }
726
727    #[inline(always)]
728    fn record_external_node_entered(
729        &mut self,
730        external_node_id: MastNodeId,
731        forest: &Arc<MastForest>,
732    ) {
733        // External nodes don't go through `start_clock_cycle`, so we record their visit here so
734        // the per-source-forest sparse builder includes them. The replay path needs the external
735        // node present in the sparse forest because `execute_external_node` indexes the forest by
736        // id to fetch the procedure digest.
737        self.record_visit(forest, external_node_id);
738    }
739
740    #[inline(always)]
741    fn record_hasher_permute(
742        &mut self,
743        input_state: [Felt; STATE_WIDTH],
744        output_state: [Felt; STATE_WIDTH],
745    ) {
746        self.hasher_for_chiplet.record_permute_input(input_state);
747        self.hasher_chiplet_shim.record_permute_output(output_state);
748    }
749
750    #[inline(always)]
751    fn record_hasher_build_merkle_root(
752        &mut self,
753        node: Word,
754        path: Option<&MerklePath>,
755        index: Felt,
756        output_root: Word,
757    ) {
758        let path = path.expect("execution tracer expects a valid Merkle path");
759        self.hasher_chiplet_shim.record_build_merkle_root(path, output_root);
760        self.hasher_for_chiplet.record_build_merkle_root(node, path.clone(), index);
761    }
762
763    #[inline(always)]
764    fn record_hasher_update_merkle_root(
765        &mut self,
766        old_value: Word,
767        new_value: Word,
768        path: Option<&MerklePath>,
769        index: Felt,
770        old_root: Word,
771        new_root: Word,
772    ) {
773        let path = path.expect("execution tracer expects a valid Merkle path");
774        self.hasher_chiplet_shim.record_update_merkle_root(path, old_root, new_root);
775        self.hasher_for_chiplet.record_update_merkle_root(
776            old_value,
777            new_value,
778            path.clone(),
779            index,
780        );
781    }
782
783    #[inline(always)]
784    fn record_memory_read_element(
785        &mut self,
786        element: Felt,
787        addr: Felt,
788        ctx: ContextId,
789        clk: RowIndex,
790    ) {
791        self.memory_reads.record_read_element(element, addr, ctx, clk);
792    }
793
794    #[inline(always)]
795    fn record_memory_read_word(&mut self, word: Word, addr: Felt, ctx: ContextId, clk: RowIndex) {
796        self.memory_reads.record_read_word(word, addr, ctx, clk);
797    }
798
799    #[inline(always)]
800    fn record_memory_write_element(
801        &mut self,
802        element: Felt,
803        addr: Felt,
804        ctx: ContextId,
805        clk: RowIndex,
806    ) {
807        self.memory_writes.record_write_element(element, addr, ctx, clk);
808    }
809
810    #[inline(always)]
811    fn record_memory_write_word(&mut self, word: Word, addr: Felt, ctx: ContextId, clk: RowIndex) {
812        self.memory_writes.record_write_word(word, addr, ctx, clk);
813    }
814
815    #[inline(always)]
816    fn record_memory_read_element_pair(
817        &mut self,
818        element_0: Felt,
819        addr_0: Felt,
820        element_1: Felt,
821        addr_1: Felt,
822        ctx: ContextId,
823        clk: RowIndex,
824    ) {
825        self.memory_reads.record_read_element(element_0, addr_0, ctx, clk);
826        self.memory_reads.record_read_element(element_1, addr_1, ctx, clk);
827    }
828
829    #[inline(always)]
830    fn record_memory_read_dword(
831        &mut self,
832        words: [Word; 2],
833        addr: Felt,
834        ctx: ContextId,
835        clk: RowIndex,
836    ) {
837        self.memory_reads.record_read_word(words[0], addr, ctx, clk);
838        self.memory_reads.record_read_word(words[1], addr + PTR_OFFSET_WORD, ctx, clk);
839    }
840
841    #[inline(always)]
842    fn record_dyncall_memory(
843        &mut self,
844        callee_hash: Word,
845        read_addr: Felt,
846        read_ctx: ContextId,
847        fmp_ctx: ContextId,
848        clk: RowIndex,
849    ) {
850        self.memory_reads.record_read_word(callee_hash, read_addr, read_ctx, clk);
851        self.memory_writes.record_write_element(FMP_INIT_VALUE, FMP_ADDR, fmp_ctx, clk);
852    }
853
854    #[inline(always)]
855    fn record_crypto_stream(
856        &mut self,
857        plaintext: [Word; 2],
858        src_addr: Felt,
859        ciphertext: [Word; 2],
860        dst_addr: Felt,
861        ctx: ContextId,
862        clk: RowIndex,
863    ) {
864        self.memory_reads.record_read_word(plaintext[0], src_addr, ctx, clk);
865        self.memory_reads
866            .record_read_word(plaintext[1], src_addr + PTR_OFFSET_WORD, ctx, clk);
867        self.memory_writes.record_write_word(ciphertext[0], dst_addr, ctx, clk);
868        self.memory_writes
869            .record_write_word(ciphertext[1], dst_addr + PTR_OFFSET_WORD, ctx, clk);
870    }
871
872    #[inline(always)]
873    fn record_pipe(&mut self, words: [Word; 2], addr: Felt, ctx: ContextId, clk: RowIndex) {
874        self.advice.record_pop_stack_dword(words);
875        self.memory_writes.record_write_word(words[0], addr, ctx, clk);
876        self.memory_writes.record_write_word(words[1], addr + PTR_OFFSET_WORD, ctx, clk);
877    }
878
879    #[inline(always)]
880    fn record_advice_pop_stack(&mut self, value: Felt) {
881        self.advice.record_pop_stack(value);
882    }
883
884    #[inline(always)]
885    fn record_advice_pop_stack_word(&mut self, word: Word) {
886        self.advice.record_pop_stack_word(word);
887    }
888
889    #[inline(always)]
890    fn record_u32and(&mut self, a: Felt, b: Felt) {
891        self.bitwise.record_u32and(a, b);
892    }
893
894    #[inline(always)]
895    fn record_u32xor(&mut self, a: Felt, b: Felt) {
896        self.bitwise.record_u32xor(a, b);
897    }
898
899    #[inline(always)]
900    fn record_u32_range_checks(&mut self, u32_lo: Felt, u32_hi: Felt) {
901        let (t1, t0) = split_u32_into_u16(u32_lo.as_canonical_u64());
902        let (t3, t2) = split_u32_into_u16(u32_hi.as_canonical_u64());
903
904        self.range_checker.record_range_check_u32([t0, t1, t2, t3]);
905    }
906
907    #[inline(always)]
908    fn record_kernel_proc_access(&mut self, proc_hash: Word) {
909        self.kernel.record_kernel_proc_access(proc_hash);
910    }
911
912    #[inline(always)]
913    fn record_circuit_evaluation(&mut self, circuit_evaluation: CircuitEvaluation) {
914        self.ace.record_circuit_evaluation(circuit_evaluation);
915    }
916
917    #[inline(always)]
918    fn finalize_clock_cycle(
919        &mut self,
920        processor: &FastProcessor,
921        _op_helper_registers: OperationHelperRegisters,
922        _current_forest: &Arc<MastForest>,
923    ) {
924        // Restore the overflow table context for Call/Syscall/Dyncall END. This is deferred
925        // from start_clock_cycle because finalize_clock_cycle is only called when the operation
926        // succeeds (i.e., the stack depth check in processor.restore_context() passes).
927        if self.pending_restore_context {
928            // Restore context for call/syscall/dyncall: pop the current context's
929            // (empty) overflow stack and restore the previous context's overflow state.
930            self.overflow_table.restore_context();
931            self.overflow_replay.record_restore_context_overflow_addr(
932                MIN_STACK_DEPTH + self.overflow_table.num_elements_in_current_ctx(),
933                self.overflow_table.last_update_clk_in_current_ctx(),
934            );
935
936            self.pending_restore_context = false;
937        }
938
939        // Record all memory reads performed during EvalCircuit operations. We run this in
940        // `finalize_clock_cycle` to ensure that the memory reads are only recorded if the operation
941        // succeeds (and hence the values read from the stack can be assumed to be valid).
942        if self.is_eval_circuit_op {
943            let ptr = processor.stack_get(0);
944            let num_read = processor.stack_get(1).as_canonical_u64();
945            let num_eval = processor.stack_get(2).as_canonical_u64();
946            let ctx = processor.ctx();
947            let clk = processor.clock();
948
949            let num_read_rows = num_read / 2;
950
951            let mut addr = ptr;
952            for _ in 0..num_read_rows {
953                let word = processor
954                    .memory()
955                    .read_word(ctx, addr, clk)
956                    .expect("EvalCircuit memory read should not fail after successful execution");
957                self.memory_reads.record_read_word(word, addr, ctx, clk);
958                addr += PTR_OFFSET_WORD;
959            }
960            for _ in 0..num_eval {
961                let element = processor
962                    .memory()
963                    .read_element(ctx, addr)
964                    .expect("EvalCircuit memory read should not fail after successful execution");
965                self.memory_reads.record_read_element(element, addr, ctx, clk);
966                addr += PTR_OFFSET_ELEM;
967            }
968
969            self.is_eval_circuit_op = false;
970        }
971    }
972}
973
974/// Extracts the [`MastNodeId`] that is being executed at the start of a clock cycle from the given
975/// continuation, so that the node can be marked as visited in its source forest's sparse builder.
976///
977/// Continuations not passed to [`Tracer::start_clock_cycle`] (per the trait contract) are matched
978/// pessimistically here, returning `None`.
979#[inline]
980fn node_id_for_visit<F>(continuation: &Continuation<F>) -> Option<MastNodeId> {
981    match *continuation {
982        Continuation::StartNode(id)
983        | Continuation::FinishJoin(id)
984        | Continuation::FinishSplit(id)
985        | Continuation::FinishCall(id)
986        | Continuation::FinishDyn(id)
987        | Continuation::FinishBasicBlock(id) => Some(id),
988        Continuation::FinishLoop(id) => Some(id),
989        Continuation::ResumeBasicBlock { node_id, .. } | Continuation::Respan { node_id, .. } => {
990            Some(node_id)
991        },
992        Continuation::EnterForest { .. } => None,
993    }
994}
995
996// HASHER CHIPLET SHIM
997// ================================================================================================
998
999/// The number of controller rows per permutation request (input + output = 2), as u32.
1000const NUM_HASHER_ROWS_PER_PERMUTATION: u32 = CONTROLLER_ROWS_PER_PERMUTATION as u32;
1001
1002/// Implements a shim for the hasher chiplet, where the responses of the hasher chiplet are emulated
1003/// and recorded for later replay.
1004///
1005/// This is used to simulate hasher operations in parallel trace generation without needing to
1006/// actually generate the hasher trace. All hasher operations are recorded during fast execution and
1007/// then replayed during core trace generation.
1008#[derive(Debug)]
1009pub struct HasherChipletShim {
1010    /// The address of the next MAST node encountered during execution. This field is used to keep
1011    /// track of the number of rows in the hasher chiplet, from which the address of the next MAST
1012    /// node is derived.
1013    addr: u32,
1014    /// Replay for the hasher chiplet responses, recording only the hasher chiplet responses.
1015    hasher_replay: HasherResponseReplay,
1016    block_addr_replay: BlockAddressReplay,
1017}
1018
1019impl HasherChipletShim {
1020    /// Creates a new [HasherChipletShim].
1021    pub fn new() -> Self {
1022        Self {
1023            addr: 1,
1024            hasher_replay: HasherResponseReplay::default(),
1025            block_addr_replay: BlockAddressReplay::default(),
1026        }
1027    }
1028
1029    /// Records the address returned from a call to `Hasher::hash_control_block()`.
1030    pub fn record_hash_control_block(&mut self) -> Felt {
1031        let block_addr = Felt::from_u32(self.addr);
1032
1033        self.block_addr_replay.record_block_address(block_addr);
1034        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION;
1035
1036        block_addr
1037    }
1038
1039    /// Records the address returned from a call to `Hasher::hash_basic_block()`.
1040    pub fn record_hash_basic_block(&mut self, basic_block_node: &BasicBlockNode) -> Felt {
1041        let block_addr = Felt::from_u32(self.addr);
1042
1043        self.block_addr_replay.record_block_address(block_addr);
1044        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION * basic_block_node.num_op_batches() as u32;
1045
1046        block_addr
1047    }
1048    /// Records the result of a call to `Hasher::permute()`.
1049    pub fn record_permute_output(&mut self, hashed_state: [Felt; 12]) {
1050        self.hasher_replay.record_permute(Felt::from_u32(self.addr), hashed_state);
1051        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION;
1052    }
1053
1054    /// Records the result of a call to `Hasher::build_merkle_root()`.
1055    pub fn record_build_merkle_root(&mut self, path: &MerklePath, computed_root: Word) {
1056        self.hasher_replay
1057            .record_build_merkle_root(Felt::from_u32(self.addr), computed_root);
1058        self.addr += NUM_HASHER_ROWS_PER_PERMUTATION * path.depth() as u32;
1059    }
1060
1061    /// Records the result of a call to `Hasher::update_merkle_root()`.
1062    pub fn record_update_merkle_root(&mut self, path: &MerklePath, old_root: Word, new_root: Word) {
1063        self.hasher_replay
1064            .record_update_merkle_root(Felt::from_u32(self.addr), old_root, new_root);
1065
1066        // The Merkle path is verified twice: once for the old root and once for the new root.
1067        self.addr += 2 * NUM_HASHER_ROWS_PER_PERMUTATION * path.depth() as u32;
1068    }
1069
1070    pub fn extract_replay(&mut self) -> (HasherResponseReplay, BlockAddressReplay) {
1071        (
1072            core::mem::take(&mut self.hasher_replay),
1073            core::mem::take(&mut self.block_addr_replay),
1074        )
1075    }
1076}
1077
1078impl Default for HasherChipletShim {
1079    fn default() -> Self {
1080        Self::new()
1081    }
1082}