Skip to main content

miden_processor/trace/
utils.rs

1#[cfg(test)]
2use alloc::vec::Vec;
3
4use miden_air::trace::MIN_TRACE_LEN;
5
6use super::chiplets::Chiplets;
7use crate::{Felt, ONE};
8#[cfg(test)]
9use crate::{operation::Operation, utils::ToElements};
10
11// ROW-MAJOR TRACE WRITER
12// ================================================================================================
13
14/// Row-major flat buffer writer (`write_row` is a single `copy_from_slice`).
15///
16/// `payload` is the number of leading columns written per row; `stride` is the physical row
17/// width of the backing buffer. When `stride > payload`, the trailing `stride - payload`
18/// columns of each row are left untouched (callers rely on them staying zero-initialized).
19#[derive(Debug)]
20pub struct RowMajorTraceWriter<'a, E> {
21    data: &'a mut [E],
22    payload: usize,
23    stride: usize,
24}
25
26impl<'a, E: Copy> RowMajorTraceWriter<'a, E> {
27    /// Creates a writer whose physical row width equals the per-row payload.
28    #[cfg(test)]
29    pub fn new(data: &'a mut [E], width: usize) -> Self {
30        Self::with_stride(data, width, width)
31    }
32
33    /// Creates a writer that writes `payload` columns per row into a buffer with physical row
34    /// width `stride` (`stride >= payload`).
35    pub fn with_stride(data: &'a mut [E], payload: usize, stride: usize) -> Self {
36        debug_assert!(stride >= payload, "stride must be >= payload");
37        debug_assert_eq!(data.len() % stride, 0, "buffer length must be a multiple of stride");
38        Self { data, payload, stride }
39    }
40
41    /// Writes one row's payload; `values.len()` must equal `payload`.
42    #[inline(always)]
43    pub fn write_row(&mut self, row: usize, values: &[E]) {
44        debug_assert_eq!(values.len(), self.payload);
45        let start = row * self.stride;
46        self.data[start..start + self.payload].copy_from_slice(values);
47    }
48}
49
50// TRACE FRAGMENT
51// ================================================================================================
52
53/// Physical column of the `s_00` permutation selector within the chiplets trace.
54pub const S_00_COL: usize = 0;
55
56/// Physical column of the `s_01` controller selector within the chiplets trace.
57pub const S_01_COL: usize = 1;
58
59/// Physical column of the `chip_clk` counter within the chiplets trace.
60pub const CHIP_CLK_COL: usize = 2;
61
62/// Physical column where the chiplet data band begins.
63pub const DATA_COL_START: usize = 3;
64
65/// A writable, row-major view over one chiplet's region of the chiplets trace.
66///
67/// A chiplet occupies a contiguous band of rows and a contiguous band of columns
68/// `[col_start, col_start + num_cols)`. [`Self::copy_rows_from`] also writes the per-row
69/// `prefix_one_cols` selectors and the `chip_clk` column at [`CHIP_CLK_COL`].
70///
71/// When `scatter_last` is set, the final source column of each row is written to that physical
72/// column instead of the contiguous band; the hasher uses this to place its `s_00` selector at
73/// [`S_00_COL`] while its remaining columns stay contiguous.
74pub struct ChipletTraceFragment<'a> {
75    /// Contiguous `num_rows * stride` row-major slice (this chiplet's rows).
76    band: &'a mut [Felt],
77    stride: usize,
78    col_start: usize,
79    num_rows: usize,
80    num_cols: usize,
81    /// Global row offset of `band[0]` in the chiplets trace; used to compute `chip_clk`.
82    row_offset: usize,
83    /// Columns to set to ONE on every row in this band.
84    prefix_one_cols: &'static [usize],
85    /// Physical column for `chip_clk`; written every row when `write_clk` is set.
86    clk_col: usize,
87    /// When set, write `chip_clk` at `clk_col`.
88    write_clk: bool,
89    /// When set, route the last source column of each row to this physical column.
90    scatter_last: Option<usize>,
91}
92
93impl<'a> ChipletTraceFragment<'a> {
94    /// Bare fragment with no prefix selectors or `chip_clk`. For chiplet-level unit tests.
95    pub fn row_major(
96        band: &'a mut [Felt],
97        stride: usize,
98        col_start: usize,
99        num_cols: usize,
100    ) -> Self {
101        debug_assert_eq!(band.len() % stride, 0, "band length must be a multiple of stride");
102        debug_assert!(col_start + num_cols <= stride, "column band overruns the row stride");
103        let num_rows = band.len() / stride;
104        Self {
105            band,
106            stride,
107            col_start,
108            num_rows,
109            num_cols,
110            row_offset: 0,
111            prefix_one_cols: &[],
112            clk_col: CHIP_CLK_COL,
113            write_clk: false,
114            scatter_last: None,
115        }
116    }
117
118    /// Adds the chiplets-trace overheads: per-row ONEs at `prefix_one_cols` and `chip_clk` at
119    /// [`CHIP_CLK_COL`], using `row_offset` as `band[0]`'s global row.
120    pub fn with_overheads(
121        band: &'a mut [Felt],
122        stride: usize,
123        col_start: usize,
124        num_cols: usize,
125        row_offset: usize,
126        prefix_one_cols: &'static [usize],
127    ) -> Self {
128        debug_assert_eq!(band.len() % stride, 0, "band length must be a multiple of stride");
129        debug_assert!(col_start + num_cols <= stride, "column band overruns the row stride");
130        let num_rows = band.len() / stride;
131        Self {
132            band,
133            stride,
134            col_start,
135            num_rows,
136            num_cols,
137            row_offset,
138            prefix_one_cols,
139            clk_col: CHIP_CLK_COL,
140            write_clk: true,
141            scatter_last: None,
142        }
143    }
144
145    /// Like [`Self::with_overheads`], but routes the last source column of each row to the
146    /// `scatter_last` physical column instead of the contiguous band.
147    pub fn with_scattered_last(
148        band: &'a mut [Felt],
149        stride: usize,
150        col_start: usize,
151        num_cols: usize,
152        row_offset: usize,
153        scatter_last: usize,
154    ) -> Self {
155        debug_assert_eq!(band.len() % stride, 0, "band length must be a multiple of stride");
156        debug_assert!(num_cols >= 1, "scattered fragment needs at least one column");
157        debug_assert!(col_start + num_cols - 1 <= stride, "column band overruns the row stride",);
158        let num_rows = band.len() / stride;
159        Self {
160            band,
161            stride,
162            col_start,
163            num_rows,
164            num_cols,
165            row_offset,
166            prefix_one_cols: &[],
167            clk_col: CHIP_CLK_COL,
168            write_clk: true,
169            scatter_last: Some(scatter_last),
170        }
171    }
172
173    // PUBLIC ACCESSORS
174    // --------------------------------------------------------------------------------------------
175
176    /// Returns the number of columns in this execution trace fragment.
177    pub fn width(&self) -> usize {
178        self.num_cols
179    }
180
181    /// Returns the number of rows in this execution trace fragment.
182    pub fn len(&self) -> usize {
183        self.num_rows
184    }
185
186    /// Sets the `s_01` controller selector to [`ONE`] on `row`.
187    ///
188    /// No-op when this fragment has no prefix space (`col_start < DATA_COL_START`), i.e. the band
189    /// starts inside the prefix columns.
190    pub fn set_s_01(&mut self, row: usize) {
191        if self.col_start < DATA_COL_START {
192            return;
193        }
194        self.band[row * self.stride + S_01_COL] = ONE;
195    }
196
197    // DATA MUTATORS
198    // --------------------------------------------------------------------------------------------
199
200    /// Copies a chiplet's row-major buffer (`num_cols` cells per row) into this fragment's
201    /// band, fusing the per-row prefix-selector ONEs and trailing `chip_clk` when configured.
202    pub fn copy_rows_from(&mut self, src: &[Felt]) {
203        debug_assert_eq!(src.len(), self.num_rows * self.num_cols, "source buffer size mismatch");
204        self.copy_rows_into(0, src);
205    }
206
207    /// Copies `src.len() / num_cols` rows starting at `row_offset` into this fragment's band,
208    /// fusing the per-row prefix-selector ONEs and the `chip_clk` column when configured.
209    pub fn copy_rows_into(&mut self, row_offset: usize, src: &[Felt]) {
210        debug_assert_eq!(src.len() % self.num_cols, 0, "source buffer size not row-aligned");
211        let chunk_rows = src.len() / self.num_cols;
212        debug_assert!(
213            row_offset + chunk_rows <= self.num_rows,
214            "chunk overruns fragment row range",
215        );
216        // The number of leading source columns written contiguously; the scattered last column
217        // (when present) is routed separately.
218        let contiguous_cols = match self.scatter_last {
219            Some(_) => self.num_cols - 1,
220            None => self.num_cols,
221        };
222        for r in 0..chunk_rows {
223            let dst_row = row_offset + r;
224            let row_start = dst_row * self.stride;
225            let row = &mut self.band[row_start..row_start + self.stride];
226            for &col in self.prefix_one_cols {
227                row[col] = ONE;
228            }
229            let src_row = &src[r * self.num_cols..(r + 1) * self.num_cols];
230            row[self.col_start..self.col_start + contiguous_cols]
231                .copy_from_slice(&src_row[..contiguous_cols]);
232            if let Some(scatter_col) = self.scatter_last {
233                row[scatter_col] = src_row[self.num_cols - 1];
234            }
235            if self.write_clk {
236                row[self.clk_col] = Felt::from_u32((self.row_offset + dst_row + 1) as u32);
237            }
238        }
239    }
240}
241
242// TRACE LENGTH SUMMARY
243// ================================================================================================
244
245/// Contains the data about lengths of the trace parts.
246///
247/// - `core_trace_len` contains the length of the core trace (system + decoder + stack).
248/// - `range_trace_len` contains the length of the range checker trace.
249/// - `chiplets_trace_len` contains the trace lengths of the all chiplets (hash, bitwise, memory,
250///   kernel ROM)
251#[derive(Debug, Default, Eq, PartialEq, Clone, Copy)]
252pub struct TraceLenSummary {
253    core_trace_len: usize,
254    range_trace_len: usize,
255    chiplets_trace_len: ChipletsLengths,
256    /// Set by the trace builder when known. `None` falls back to deriving from the
257    /// unpadded component lengths via `next_power_of_two`.
258    padded_trace_len: Option<usize>,
259}
260
261impl TraceLenSummary {
262    pub fn new(
263        core_trace_len: usize,
264        range_trace_len: usize,
265        chiplets_trace_len: ChipletsLengths,
266    ) -> Self {
267        TraceLenSummary {
268            core_trace_len,
269            range_trace_len,
270            chiplets_trace_len,
271            padded_trace_len: None,
272        }
273    }
274
275    /// Like `new` but with the actual padded trace length supplied by the trace builder
276    /// (under per-AIR heights this is `max(core_height, chiplets_height)`, not a
277    /// single `next_power_of_two(max(...))`).
278    pub fn new_with_padded(
279        core_trace_len: usize,
280        range_trace_len: usize,
281        chiplets_trace_len: ChipletsLengths,
282        padded_trace_len: usize,
283    ) -> Self {
284        TraceLenSummary {
285            core_trace_len,
286            range_trace_len,
287            chiplets_trace_len,
288            padded_trace_len: Some(padded_trace_len),
289        }
290    }
291
292    /// Returns length of the core trace (system + decoder + stack).
293    pub fn core_trace_len(&self) -> usize {
294        self.core_trace_len
295    }
296
297    /// Returns length of the range checker trace.
298    pub fn range_trace_len(&self) -> usize {
299        self.range_trace_len
300    }
301
302    /// Returns [ChipletsLengths] which contains trace lengths of all chilplets.
303    pub fn chiplets_trace_len(&self) -> ChipletsLengths {
304        self.chiplets_trace_len
305    }
306
307    /// Returns the maximum of all component lengths.
308    pub fn trace_len(&self) -> usize {
309        self.range_trace_len
310            .max(self.core_trace_len)
311            .max(self.chiplets_trace_len.trace_len())
312    }
313
314    /// Returns `trace_len` rounded up to the next power of two, clamped to `MIN_TRACE_LEN`.
315    pub fn padded_trace_len(&self) -> usize {
316        self.padded_trace_len
317            .unwrap_or_else(|| self.trace_len().next_power_of_two().max(MIN_TRACE_LEN))
318    }
319
320    /// Returns the percent (0 - 100) of the steps that were added to the trace to pad it to the
321    /// next power of tow.
322    pub fn padding_percentage(&self) -> usize {
323        (self.padded_trace_len() - self.trace_len()) * 100 / self.padded_trace_len()
324    }
325}
326
327// CHIPLET LENGTHS
328// ================================================================================================
329
330/// Contains trace lengths of all chiplets: hash, bitwise, memory, ACE, and kernel ROM.
331#[derive(Default, Clone, Copy, Debug, PartialEq, Eq)]
332pub struct ChipletsLengths {
333    hash_chiplet_len: usize,
334    bitwise_chiplet_len: usize,
335    memory_chiplet_len: usize,
336    ace_chiplet_len: usize,
337    kernel_rom_len: usize,
338}
339
340impl ChipletsLengths {
341    pub fn new(chiplets: &Chiplets) -> Self {
342        ChipletsLengths {
343            hash_chiplet_len: chiplets.bitwise_start().into(),
344            bitwise_chiplet_len: chiplets.memory_start() - chiplets.bitwise_start(),
345            memory_chiplet_len: chiplets.ace_start() - chiplets.memory_start(),
346            ace_chiplet_len: chiplets.kernel_rom_start() - chiplets.ace_start(),
347            kernel_rom_len: chiplets.padding_start() - chiplets.kernel_rom_start(),
348        }
349    }
350
351    pub fn from_parts(
352        hash_len: usize,
353        bitwise_len: usize,
354        memory_len: usize,
355        ace_len: usize,
356        kernel_len: usize,
357    ) -> Self {
358        ChipletsLengths {
359            hash_chiplet_len: hash_len,
360            bitwise_chiplet_len: bitwise_len,
361            memory_chiplet_len: memory_len,
362            ace_chiplet_len: ace_len,
363            kernel_rom_len: kernel_len,
364        }
365    }
366
367    /// Returns the length of the hash chiplet trace.
368    pub fn hash_chiplet_len(&self) -> usize {
369        self.hash_chiplet_len
370    }
371
372    /// Returns the length of the bitwise trace.
373    pub fn bitwise_chiplet_len(&self) -> usize {
374        self.bitwise_chiplet_len
375    }
376
377    /// Returns the length of the memory trace.
378    pub fn memory_chiplet_len(&self) -> usize {
379        self.memory_chiplet_len
380    }
381
382    /// Returns the length of the ACE chiplet trace.
383    pub fn ace_chiplet_len(&self) -> usize {
384        self.ace_chiplet_len
385    }
386
387    /// Returns the length of the kernel ROM trace.
388    pub fn kernel_rom_len(&self) -> usize {
389        self.kernel_rom_len
390    }
391
392    /// Returns the length of the trace required to accommodate chiplet components and 1
393    /// mandatory padding row required for ensuring sufficient trace length for auxiliary connector
394    /// columns that rely on the memory chiplet.
395    pub fn trace_len(&self) -> usize {
396        self.hash_chiplet_len()
397            + self.bitwise_chiplet_len()
398            + self.memory_chiplet_len()
399            + self.ace_chiplet_len()
400            + self.kernel_rom_len()
401            + 1
402    }
403}
404
405// U32 HELPERS
406// ================================================================================================
407
408/// Splits an element into two 16 bit integer limbs. It assumes that the field element contains a
409/// valid 32-bit integer value.
410pub(crate) fn split_element_u32_into_u16(value: Felt) -> (Felt, Felt) {
411    let (hi, lo) = split_u32_into_u16(value.as_canonical_u64());
412    (Felt::new_unchecked(hi as u64), Felt::new_unchecked(lo as u64))
413}
414
415/// Splits a u64 integer assumed to contain a 32-bit value into two u16 integers.
416///
417/// # Errors
418/// Fails in debug mode if the provided value is not a 32-bit value.
419pub(crate) fn split_u32_into_u16(value: u64) -> (u16, u16) {
420    const U32MAX: u64 = u32::MAX as u64;
421    debug_assert!(value <= U32MAX, "not a 32-bit value");
422
423    let lo = value as u16;
424    let hi = (value >> 16) as u16;
425
426    (hi, lo)
427}
428
429// TEST HELPERS
430// ================================================================================================
431
432/// Builds a 17-op basic block payload that straddles a RESPAN batch boundary, plus the initial
433/// values its `Push` ops emit. Consumed by decoder / hasher tests that exercise multi-batch
434/// SPAN execution.
435#[cfg(test)]
436pub fn build_span_with_respan_ops() -> (Vec<Operation>, Vec<Felt>) {
437    let iv = [1, 3, 5, 7, 9, 11, 13, 15, 17].to_elements();
438    let ops = alloc::vec![
439        Operation::Push(iv[0]),
440        Operation::Push(iv[1]),
441        Operation::Push(iv[2]),
442        Operation::Push(iv[3]),
443        Operation::Push(iv[4]),
444        Operation::Push(iv[5]),
445        Operation::Push(iv[6]),
446        // next batch
447        Operation::Push(iv[7]),
448        Operation::Push(iv[8]),
449        Operation::Add,
450        // drops to make sure stack overflow is empty on exit
451        Operation::Drop,
452        Operation::Drop,
453        Operation::Drop,
454        Operation::Drop,
455        Operation::Drop,
456        Operation::Drop,
457        Operation::Drop,
458        Operation::Drop,
459    ];
460    (ops, iv)
461}