sp1_core_machine/program/
mod.rs

1use core::{
2    borrow::{Borrow, BorrowMut},
3    mem::size_of,
4};
5use std::collections::HashMap;
6
7use crate::{
8    air::ProgramAirBuilder,
9    utils::{next_power_of_two, pad_rows_fixed, zeroed_f_vec},
10};
11use p3_air::{Air, BaseAir, PairBuilder};
12use p3_field::PrimeField32;
13use p3_matrix::{dense::RowMajorMatrix, Matrix};
14use p3_maybe_rayon::prelude::{ParallelBridge, ParallelIterator};
15use sp1_core_executor::{ExecutionRecord, Program};
16use sp1_derive::AlignedBorrow;
17use sp1_stark::air::{MachineAir, SP1AirBuilder};
18
19use crate::cpu::columns::InstructionCols;
20
21/// The number of preprocessed program columns.
22pub const NUM_PROGRAM_PREPROCESSED_COLS: usize = size_of::<ProgramPreprocessedCols<u8>>();
23
24/// The number of columns for the program multiplicities.
25pub const NUM_PROGRAM_MULT_COLS: usize = size_of::<ProgramMultiplicityCols<u8>>();
26
27/// The column layout for the chip.
28#[derive(AlignedBorrow, Clone, Copy, Default)]
29#[repr(C)]
30pub struct ProgramPreprocessedCols<T> {
31    pub pc: T,
32    pub instruction: InstructionCols<T>,
33}
34
35/// The column layout for the chip.
36#[derive(AlignedBorrow, Clone, Copy, Default)]
37#[repr(C)]
38pub struct ProgramMultiplicityCols<T> {
39    pub multiplicity: T,
40}
41
42/// A chip that implements addition for the opcodes ADD and ADDI.
43#[derive(Default)]
44pub struct ProgramChip;
45
46impl ProgramChip {
47    pub const fn new() -> Self {
48        Self {}
49    }
50}
51
52impl<F: PrimeField32> MachineAir<F> for ProgramChip {
53    type Record = ExecutionRecord;
54
55    type Program = Program;
56
57    fn name(&self) -> String {
58        "Program".to_string()
59    }
60
61    fn preprocessed_width(&self) -> usize {
62        NUM_PROGRAM_PREPROCESSED_COLS
63    }
64
65    fn generate_preprocessed_trace(&self, program: &Self::Program) -> Option<RowMajorMatrix<F>> {
66        debug_assert!(
67            !program.instructions.is_empty() || program.preprocessed_shape.is_some(),
68            "empty program"
69        );
70        // Generate the trace rows for each event.
71        let nb_rows = program.instructions.len();
72        let size_log2 = program.fixed_log2_rows::<F, _>(self);
73        let padded_nb_rows = next_power_of_two(nb_rows, size_log2);
74        let mut values = zeroed_f_vec(padded_nb_rows * NUM_PROGRAM_PREPROCESSED_COLS);
75        let chunk_size = std::cmp::max((nb_rows + 1) / num_cpus::get(), 1);
76
77        values
78            .chunks_mut(chunk_size * NUM_PROGRAM_PREPROCESSED_COLS)
79            .enumerate()
80            .par_bridge()
81            .for_each(|(i, rows)| {
82                rows.chunks_mut(NUM_PROGRAM_PREPROCESSED_COLS).enumerate().for_each(|(j, row)| {
83                    let mut idx = i * chunk_size + j;
84                    if idx >= nb_rows {
85                        idx = 0;
86                    }
87                    let cols: &mut ProgramPreprocessedCols<F> = row.borrow_mut();
88                    let instruction = &program.instructions[idx];
89                    let pc = program.pc_base + (idx as u32 * 4);
90                    cols.pc = F::from_canonical_u32(pc);
91                    cols.instruction.populate(instruction);
92                });
93            });
94
95        // Convert the trace to a row major matrix.
96        Some(RowMajorMatrix::new(values, NUM_PROGRAM_PREPROCESSED_COLS))
97    }
98
99    fn generate_dependencies(&self, _input: &ExecutionRecord, _output: &mut ExecutionRecord) {
100        // Do nothing since this chip has no dependencies.
101    }
102
103    fn generate_trace(
104        &self,
105        input: &ExecutionRecord,
106        _output: &mut ExecutionRecord,
107    ) -> RowMajorMatrix<F> {
108        // Generate the trace rows for each event.
109
110        // Collect the number of times each instruction is called from the cpu events.
111        // Store it as a map of PC -> count.
112        let mut instruction_counts = HashMap::new();
113        input.cpu_events.iter().for_each(|event| {
114            let pc = event.pc;
115            instruction_counts.entry(pc).and_modify(|count| *count += 1).or_insert(1);
116        });
117
118        let mut rows = input
119            .program
120            .instructions
121            .clone()
122            .into_iter()
123            .enumerate()
124            .map(|(i, _)| {
125                let pc = input.program.pc_base + (i as u32 * 4);
126                let mut row = [F::zero(); NUM_PROGRAM_MULT_COLS];
127                let cols: &mut ProgramMultiplicityCols<F> = row.as_mut_slice().borrow_mut();
128                cols.multiplicity =
129                    F::from_canonical_usize(*instruction_counts.get(&pc).unwrap_or(&0));
130                row
131            })
132            .collect::<Vec<_>>();
133
134        // Pad the trace to a power of two depending on the proof shape in `input`.
135        pad_rows_fixed(
136            &mut rows,
137            || [F::zero(); NUM_PROGRAM_MULT_COLS],
138            input.fixed_log2_rows::<F, _>(self),
139        );
140
141        RowMajorMatrix::new(rows.into_iter().flatten().collect::<Vec<_>>(), NUM_PROGRAM_MULT_COLS)
142    }
143
144    fn included(&self, _: &Self::Record) -> bool {
145        true
146    }
147}
148
149impl<F> BaseAir<F> for ProgramChip {
150    fn width(&self) -> usize {
151        NUM_PROGRAM_MULT_COLS
152    }
153}
154
155impl<AB> Air<AB> for ProgramChip
156where
157    AB: SP1AirBuilder + PairBuilder,
158{
159    fn eval(&self, builder: &mut AB) {
160        let main = builder.main();
161        let preprocessed = builder.preprocessed();
162
163        let prep_local = preprocessed.row_slice(0);
164        let prep_local: &ProgramPreprocessedCols<AB::Var> = (*prep_local).borrow();
165        let mult_local = main.row_slice(0);
166        let mult_local: &ProgramMultiplicityCols<AB::Var> = (*mult_local).borrow();
167
168        // Constrain the interaction with CPU table
169        builder.receive_program(prep_local.pc, prep_local.instruction, mult_local.multiplicity);
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    #![allow(clippy::print_stdout)]
176
177    use std::sync::Arc;
178
179    use hashbrown::HashMap;
180    use p3_baby_bear::BabyBear;
181
182    use p3_matrix::dense::RowMajorMatrix;
183    use sp1_core_executor::{ExecutionRecord, Instruction, Opcode, Program};
184    use sp1_stark::air::MachineAir;
185
186    use crate::program::ProgramChip;
187
188    #[test]
189    fn generate_trace() {
190        // main:
191        //     addi x29, x0, 5
192        //     addi x30, x0, 37
193        //     add x31, x30, x29
194        let instructions = vec![
195            Instruction::new(Opcode::ADD, 29, 0, 5, false, true),
196            Instruction::new(Opcode::ADD, 30, 0, 37, false, true),
197            Instruction::new(Opcode::ADD, 31, 30, 29, false, false),
198        ];
199        let shard = ExecutionRecord {
200            program: Arc::new(Program {
201                instructions,
202                pc_start: 0,
203                pc_base: 0,
204                memory_image: HashMap::new(),
205                preprocessed_shape: None,
206            }),
207            ..Default::default()
208        };
209        let chip = ProgramChip::new();
210        let trace: RowMajorMatrix<BabyBear> =
211            chip.generate_trace(&shard, &mut ExecutionRecord::default());
212        println!("{:?}", trace.values)
213    }
214}