use core::{
borrow::{Borrow, BorrowMut},
mem::{size_of, MaybeUninit},
};
use hashbrown::HashMap;
use slop_air::{Air, BaseAir};
use slop_algebra::{AbstractField, PrimeField, PrimeField32};
use slop_matrix::Matrix;
use slop_maybe_rayon::prelude::{ParallelBridge, ParallelIterator, ParallelSlice};
use sp1_core_executor::{
events::{AluEvent, ByteLookupEvent, ByteRecord},
ExecutionRecord, Opcode, Program, CLK_INC, PC_INC,
};
use sp1_derive::AlignedBorrow;
use sp1_hypercube::{air::MachineAir, Word};
use crate::{
adapter::{
register::r_type::{RTypeReader, RTypeReaderInput},
state::{CPUState, CPUStateInput},
},
air::{SP1CoreAirBuilder, SP1Operation},
operations::{MulOperation, MulOperationInput},
utils::next_multiple_of_32,
};
pub const NUM_MUL_COLS: usize = size_of::<MulCols<u8>>();
#[derive(Default)]
pub struct MulChip;
#[derive(AlignedBorrow, Default, Debug, Clone, Copy)]
#[repr(C)]
pub struct MulCols<T> {
pub state: CPUState<T>,
pub adapter: RTypeReader<T>,
pub a: Word<T>,
pub mul_operation: MulOperation<T>,
pub is_mul: T,
pub is_mulh: T,
pub is_mulhu: T,
pub is_mulhsu: T,
pub is_mulw: T,
}
impl<F: PrimeField32> MachineAir<F> for MulChip {
type Record = ExecutionRecord;
type Program = Program;
fn name(&self) -> &'static str {
"Mul"
}
fn num_rows(&self, input: &Self::Record) -> Option<usize> {
let nb_rows =
next_multiple_of_32(input.mul_events.len(), input.fixed_log2_rows::<F, _>(self));
Some(nb_rows)
}
fn generate_trace_into(
&self,
input: &ExecutionRecord,
_output: &mut ExecutionRecord,
buffer: &mut [MaybeUninit<F>],
) {
let nb_rows = input.mul_events.len();
let padded_nb_rows = <MulChip as MachineAir<F>>::num_rows(self, input).unwrap();
let chunk_size = std::cmp::max((nb_rows + 1) / num_cpus::get(), 1);
unsafe {
let padding_start = nb_rows * NUM_MUL_COLS;
let padding_size = (padded_nb_rows - nb_rows) * NUM_MUL_COLS;
if padding_size > 0 {
core::ptr::write_bytes(buffer[padding_start..].as_mut_ptr(), 0, padding_size);
}
}
let buffer_ptr = buffer.as_mut_ptr() as *mut F;
let values = unsafe { core::slice::from_raw_parts_mut(buffer_ptr, nb_rows * NUM_MUL_COLS) };
values.chunks_mut(chunk_size * NUM_MUL_COLS).enumerate().par_bridge().for_each(
|(i, rows)| {
rows.chunks_mut(NUM_MUL_COLS).enumerate().for_each(|(j, row)| {
let idx = i * chunk_size + j;
let cols: &mut MulCols<F> = row.borrow_mut();
if idx < nb_rows {
let mut byte_lookup_events = Vec::new();
let event = &input.mul_events[idx];
cols.adapter.populate(&mut byte_lookup_events, event.1);
self.event_to_row(&event.0, cols, &mut byte_lookup_events);
cols.state.populate(&mut byte_lookup_events, event.0.clk, event.0.pc);
}
});
},
);
}
fn generate_dependencies(&self, input: &Self::Record, output: &mut Self::Record) {
let chunk_size = std::cmp::max(input.mul_events.len() / num_cpus::get(), 1);
let blu_batches = input
.mul_events
.par_chunks(chunk_size)
.map(|events| {
let mut blu: HashMap<ByteLookupEvent, usize> = HashMap::new();
events.iter().for_each(|event| {
let mut row = [F::zero(); NUM_MUL_COLS];
let cols: &mut MulCols<F> = row.as_mut_slice().borrow_mut();
cols.adapter.populate(&mut blu, event.1);
self.event_to_row(&event.0, cols, &mut blu);
cols.state.populate(&mut blu, event.0.clk, event.0.pc);
});
blu
})
.collect::<Vec<_>>();
output.add_byte_lookup_events_from_maps(blu_batches.iter().collect::<Vec<_>>());
}
fn included(&self, shard: &Self::Record) -> bool {
if let Some(shape) = shard.shape.as_ref() {
shape.included::<F, _>(self)
} else {
!shard.mul_events.is_empty()
}
}
}
impl MulChip {
fn event_to_row<F: PrimeField>(
&self,
event: &AluEvent,
cols: &mut MulCols<F>,
blu: &mut impl ByteRecord,
) {
cols.mul_operation.populate(
blu,
event.b,
event.c,
event.opcode == Opcode::MULH,
event.opcode == Opcode::MULHSU,
event.opcode == Opcode::MULW,
);
cols.is_mul = F::from_bool(event.opcode == Opcode::MUL);
cols.is_mulh = F::from_bool(event.opcode == Opcode::MULH);
cols.is_mulhu = F::from_bool(event.opcode == Opcode::MULHU);
cols.is_mulhsu = F::from_bool(event.opcode == Opcode::MULHSU);
cols.is_mulw = F::from_bool(event.opcode == Opcode::MULW);
cols.a = Word::from(event.a);
}
}
impl<F> BaseAir<F> for MulChip {
fn width(&self) -> usize {
NUM_MUL_COLS
}
}
impl<AB> Air<AB> for MulChip
where
AB: SP1CoreAirBuilder,
{
fn eval(&self, builder: &mut AB) {
let main = builder.main();
let local = main.row_slice(0);
let local: &MulCols<AB::Var> = (*local).borrow();
let is_real =
local.is_mul + local.is_mulh + local.is_mulhu + local.is_mulhsu + local.is_mulw;
<MulOperation<AB::F> as SP1Operation<AB>>::eval(
builder,
MulOperationInput::new(
local.a.map(|x| x.into()),
local.adapter.b().map(|x| x.into()),
local.adapter.c().map(|x| x.into()),
local.mul_operation,
is_real.clone(),
local.is_mul.into(),
local.is_mulh.into(),
local.is_mulw.into(),
local.is_mulhu.into(),
local.is_mulhsu.into(),
),
);
let opcode = {
builder.assert_bool(local.is_mul);
builder.assert_bool(local.is_mulh);
builder.assert_bool(local.is_mulhu);
builder.assert_bool(local.is_mulw);
builder.assert_bool(local.is_mulhsu);
builder.assert_bool(is_real.clone());
let mul: AB::Expr = AB::F::from_canonical_u32(Opcode::MUL as u32).into();
let mulh: AB::Expr = AB::F::from_canonical_u32(Opcode::MULH as u32).into();
let mulhu: AB::Expr = AB::F::from_canonical_u32(Opcode::MULHU as u32).into();
let mulhsu: AB::Expr = AB::F::from_canonical_u32(Opcode::MULHSU as u32).into();
let mulw: AB::Expr = AB::F::from_canonical_u32(Opcode::MULW as u32).into();
local.is_mul * mul
+ local.is_mulh * mulh
+ local.is_mulhu * mulhu
+ local.is_mulhsu * mulhsu
+ local.is_mulw * mulw
};
let funct3 = local.is_mul * AB::Expr::from_canonical_u8(Opcode::MUL.funct3().unwrap())
+ local.is_mulh * AB::Expr::from_canonical_u8(Opcode::MULH.funct3().unwrap())
+ local.is_mulhu * AB::Expr::from_canonical_u8(Opcode::MULHU.funct3().unwrap())
+ local.is_mulhsu * AB::Expr::from_canonical_u8(Opcode::MULHSU.funct3().unwrap())
+ local.is_mulw * AB::Expr::from_canonical_u8(Opcode::MULW.funct3().unwrap());
let funct7 = local.is_mul * AB::Expr::from_canonical_u8(Opcode::MUL.funct7().unwrap())
+ local.is_mulh * AB::Expr::from_canonical_u8(Opcode::MULH.funct7().unwrap())
+ local.is_mulhu * AB::Expr::from_canonical_u8(Opcode::MULHU.funct7().unwrap())
+ local.is_mulhsu * AB::Expr::from_canonical_u8(Opcode::MULHSU.funct7().unwrap())
+ local.is_mulw * AB::Expr::from_canonical_u8(Opcode::MULW.funct7().unwrap());
let mul_base = Opcode::MUL.base_opcode().0;
let mulh_base = Opcode::MULH.base_opcode().0;
let mulhu_base = Opcode::MULHU.base_opcode().0;
let mulhsu_base = Opcode::MULHSU.base_opcode().0;
let mulw_base = Opcode::MULW.base_opcode().0;
let mul_base_expr = AB::Expr::from_canonical_u32(mul_base);
let mulh_base_expr = AB::Expr::from_canonical_u32(mulh_base);
let mulhu_base_expr = AB::Expr::from_canonical_u32(mulhu_base);
let mulhsu_base_expr = AB::Expr::from_canonical_u32(mulhsu_base);
let mulw_base_expr = AB::Expr::from_canonical_u32(mulw_base);
let calculated_base_opcode = local.is_mul * mul_base_expr
+ local.is_mulh * mulh_base_expr
+ local.is_mulhu * mulhu_base_expr
+ local.is_mulhsu * mulhsu_base_expr
+ local.is_mulw * mulw_base_expr;
let mul_instr_type = Opcode::MUL.instruction_type().0 as u32;
let mulh_instr_type = Opcode::MULH.instruction_type().0 as u32;
let mulhu_instr_type = Opcode::MULHU.instruction_type().0 as u32;
let mulhsu_instr_type = Opcode::MULHSU.instruction_type().0 as u32;
let mulw_instr_type = Opcode::MULW.instruction_type().0 as u32;
let calculated_instr_type = local.is_mul * AB::Expr::from_canonical_u32(mul_instr_type)
+ local.is_mulh * AB::Expr::from_canonical_u32(mulh_instr_type)
+ local.is_mulhu * AB::Expr::from_canonical_u32(mulhu_instr_type)
+ local.is_mulhsu * AB::Expr::from_canonical_u32(mulhsu_instr_type)
+ local.is_mulw * AB::Expr::from_canonical_u32(mulw_instr_type);
<CPUState<AB::F> as SP1Operation<AB>>::eval(
builder,
CPUStateInput::new(
local.state,
[
local.state.pc[0] + AB::F::from_canonical_u32(PC_INC),
local.state.pc[1].into(),
local.state.pc[2].into(),
],
AB::Expr::from_canonical_u32(CLK_INC),
is_real.clone(),
),
);
let a_expr = local.a.map(|x| x.into());
let alu_reader_input = RTypeReaderInput::<AB, AB::Expr>::new(
local.state.clk_high::<AB>(),
local.state.clk_low::<AB>(),
local.state.pc,
opcode,
[calculated_instr_type, calculated_base_opcode, funct3, funct7],
a_expr,
local.adapter,
is_real.clone(),
);
<RTypeReader<AB::F> as SP1Operation<AB>>::eval(builder, alu_reader_input);
}
}