use core::{
borrow::{Borrow, BorrowMut},
mem::{size_of, MaybeUninit},
};
use hashbrown::HashMap;
use itertools::Itertools;
use slop_air::{Air, AirBuilder, BaseAir};
use slop_algebra::{AbstractField, Field, PrimeField, PrimeField32};
use slop_matrix::Matrix;
use slop_maybe_rayon::prelude::{ParallelBridge, ParallelIterator, ParallelSlice};
use sp1_core_executor::{
events::{AluEvent, ByteLookupEvent, ByteRecord},
ALUTypeRecord, ByteOpcode, ExecutionRecord, Opcode, Program, CLK_INC, PC_INC,
};
use sp1_derive::AlignedBorrow;
use sp1_hypercube::{air::MachineAir, Word};
use sp1_primitives::consts::{u32_to_u16_limbs, u64_to_u16_limbs, WORD_SIZE};
use crate::{
adapter::{
register::alu_type::{ALUTypeReader, ALUTypeReaderInput},
state::{CPUState, CPUStateInput},
},
air::{SP1CoreAirBuilder, SP1Operation},
operations::{U16MSBOperation, U16MSBOperationInput},
utils::next_multiple_of_32,
};
pub const NUM_SHIFT_LEFT_COLS: usize = size_of::<ShiftLeftCols<u8>>();
pub const BYTE_SIZE: usize = 8;
#[derive(Default)]
pub struct ShiftLeft;
#[derive(AlignedBorrow, Default, Debug, Clone, Copy)]
#[repr(C)]
pub struct ShiftLeftCols<T> {
pub state: CPUState<T>,
pub adapter: ALUTypeReader<T>,
pub a: Word<T>,
pub c_bits: [T; 6],
pub v_01: T,
pub v_012: T,
pub v_0123: T,
pub shift_u16: [T; 4],
pub lower_limb: Word<T>,
pub higher_limb: Word<T>,
pub limb_result: Word<T>,
pub sllw_msb: U16MSBOperation<T>,
pub is_sll: T,
pub is_sllw: T,
pub is_sllw_imm: T,
}
impl<F: PrimeField32> MachineAir<F> for ShiftLeft {
type Record = ExecutionRecord;
type Program = Program;
fn name(&self) -> &'static str {
"ShiftLeft"
}
fn num_rows(&self, input: &Self::Record) -> Option<usize> {
let nb_rows =
next_multiple_of_32(input.shift_left_events.len(), input.fixed_log2_rows::<F, _>(self));
Some(nb_rows)
}
fn generate_trace_into(
&self,
input: &ExecutionRecord,
_output: &mut ExecutionRecord,
buffer: &mut [MaybeUninit<F>],
) {
let padded_nb_rows = <ShiftLeft as MachineAir<F>>::num_rows(self, input).unwrap();
let nb_rows = input.shift_left_events.len();
let chunk_size = std::cmp::max((padded_nb_rows + 1) / num_cpus::get(), 1);
unsafe {
let padding_start = nb_rows * NUM_SHIFT_LEFT_COLS;
let padding_size = (padded_nb_rows - nb_rows) * NUM_SHIFT_LEFT_COLS;
if padding_size > 0 {
core::ptr::write_bytes(buffer[padding_start..].as_mut_ptr(), 0, padding_size);
}
}
let buffer_ptr = buffer.as_mut_ptr() as *mut F;
let values = unsafe {
core::slice::from_raw_parts_mut(buffer_ptr, padded_nb_rows * NUM_SHIFT_LEFT_COLS)
};
let padded_row_template = {
let mut row = [F::zero(); NUM_SHIFT_LEFT_COLS];
let cols: &mut ShiftLeftCols<F> = row.as_mut_slice().borrow_mut();
cols.v_01 = F::one();
cols.v_012 = F::one();
cols.v_0123 = F::one();
row
};
values.chunks_mut(chunk_size * NUM_SHIFT_LEFT_COLS).enumerate().par_bridge().for_each(
|(i, rows)| {
rows.chunks_mut(NUM_SHIFT_LEFT_COLS).enumerate().for_each(|(j, row)| {
let idx = i * chunk_size + j;
let cols: &mut ShiftLeftCols<F> = row.borrow_mut();
if idx < nb_rows {
let mut blu = Vec::new();
let event = &input.shift_left_events[idx];
cols.adapter.populate(&mut blu, event.1);
self.event_to_row(&event.0, &event.1, cols, &mut blu);
cols.state.populate(&mut blu, event.0.clk, event.0.pc);
} else {
row.copy_from_slice(&padded_row_template);
}
});
},
);
}
fn generate_dependencies(&self, input: &Self::Record, output: &mut Self::Record) {
let chunk_size = std::cmp::max(input.shift_left_events.len() / num_cpus::get(), 1);
let blu_batches = input
.shift_left_events
.par_chunks(chunk_size)
.map(|events| {
let mut blu: HashMap<ByteLookupEvent, usize> = HashMap::new();
events.iter().for_each(|event| {
let mut row = [F::zero(); NUM_SHIFT_LEFT_COLS];
let cols: &mut ShiftLeftCols<F> = row.as_mut_slice().borrow_mut();
cols.adapter.populate(&mut blu, event.1);
self.event_to_row(&event.0, &event.1, cols, &mut blu);
cols.state.populate(&mut blu, event.0.clk, event.0.pc);
});
blu
})
.collect::<Vec<_>>();
output.add_byte_lookup_events_from_maps(blu_batches.iter().collect_vec());
}
fn included(&self, shard: &Self::Record) -> bool {
if let Some(shape) = shard.shape.as_ref() {
shape.included::<F, _>(self)
} else {
!shard.shift_left_events.is_empty()
}
}
}
impl ShiftLeft {
fn event_to_row<F: PrimeField>(
&self,
event: &AluEvent,
record: &ALUTypeRecord,
cols: &mut ShiftLeftCols<F>,
blu: &mut impl ByteRecord,
) {
let c = u64_to_u16_limbs(event.c)[0];
if event.opcode == Opcode::SLLW {
let sllw_val = ((event.b as i64) << (c & 0x1f)) as u32;
let sllw_limbs = u32_to_u16_limbs(sllw_val);
cols.sllw_msb.populate_msb(blu, sllw_limbs[1]);
} else {
cols.sllw_msb.msb = F::zero();
}
cols.a = Word::from(event.a);
let is_sll = event.opcode == Opcode::SLL;
cols.is_sll = F::from_bool(is_sll);
cols.is_sllw = F::from_bool(event.opcode == Opcode::SLLW);
cols.is_sllw_imm = F::from_bool(event.opcode == Opcode::SLLW && record.is_imm);
for i in 0..6 {
cols.c_bits[i] = F::from_canonical_u16((c >> i) & 1);
}
blu.add_bit_range_check(c >> 6, 10);
cols.v_01 = F::from_canonical_u16(1 << (c & 3));
cols.v_012 = F::from_canonical_u16(1 << (c & 7));
cols.v_0123 = F::from_canonical_u16(1 << (c & 15));
let shift_amount = ((c >> 4) & 1) + 2 * ((c >> 5) & 1) * (is_sll as u16);
let mut shift = [0u16; 4];
for i in 0..4 {
if i == shift_amount as usize {
shift[i] = 1;
}
}
let b = u64_to_u16_limbs(event.b);
let bit_shift = (c & 0xF) as u8;
for i in 0..WORD_SIZE {
let limb = b[i] as u32;
let lower_limb = (limb & ((1 << (16 - bit_shift)) - 1)) as u16;
let higher_limb = (limb >> (16 - bit_shift)) as u16;
cols.lower_limb[i] = F::from_canonical_u16(lower_limb);
cols.higher_limb[i] = F::from_canonical_u16(higher_limb);
blu.add_bit_range_check(lower_limb, 16 - bit_shift);
blu.add_bit_range_check(higher_limb, bit_shift);
}
for i in 0..WORD_SIZE {
cols.limb_result[i] = cols.lower_limb[i] * F::from_canonical_u32(1u32 << bit_shift);
if i != 0 {
cols.limb_result[i] += cols.higher_limb[i - 1];
}
}
cols.shift_u16 = shift.map(|x| F::from_canonical_u16(x));
}
}
impl<F> BaseAir<F> for ShiftLeft {
fn width(&self) -> usize {
NUM_SHIFT_LEFT_COLS
}
}
impl<AB> Air<AB> for ShiftLeft
where
AB: SP1CoreAirBuilder,
{
fn eval(&self, builder: &mut AB) {
let main = builder.main();
let local = main.row_slice(0);
let local: &ShiftLeftCols<AB::Var> = (*local).borrow();
let is_real = local.is_sll + local.is_sllw;
builder.assert_bool(is_real.clone());
builder.assert_bool(local.is_sll);
builder.assert_bool(local.is_sllw);
for i in 0..6 {
builder.assert_bool(local.c_bits[i]);
}
let mut c_lower_bits = AB::Expr::zero();
let mut bit_shift = AB::Expr::zero();
for i in 0..6 {
c_lower_bits = c_lower_bits + local.c_bits[i] * AB::F::from_canonical_u32(1 << i);
if i == 3 {
bit_shift = c_lower_bits.clone();
}
}
let inverse_64 = AB::F::from_canonical_u32(64).inverse();
builder.send_byte(
AB::F::from_canonical_u32(ByteOpcode::Range as u32),
(local.adapter.c()[0] - c_lower_bits) * inverse_64,
AB::Expr::from_canonical_u32(10),
AB::Expr::zero(),
is_real.clone(),
);
for i in 0..WORD_SIZE {
builder.when(local.shift_u16[i]).assert_eq(
local.c_bits[4] + local.c_bits[5] * AB::F::from_canonical_u32(2) * local.is_sll,
AB::Expr::from_canonical_u32(i as u32),
);
builder.assert_bool(local.shift_u16[i]);
}
builder.when(is_real.clone()).assert_eq(
local.shift_u16[0] + local.shift_u16[1] + local.shift_u16[2] + local.shift_u16[3],
AB::Expr::from_canonical_u32(1),
);
let one = AB::F::from_canonical_u32(1);
let three = AB::F::from_canonical_u32(3);
let fifteen = AB::F::from_canonical_u32(15);
let two_fifty_five = AB::F::from_canonical_u32(255);
builder.assert_eq(local.v_01, (local.c_bits[0] + one) * (local.c_bits[1] * three + one));
builder.assert_eq(local.v_012, local.v_01 * (local.c_bits[2] * fifteen + one));
builder.assert_eq(local.v_0123, local.v_012 * (local.c_bits[3] * two_fifty_five + one));
for i in 0..WORD_SIZE {
let limb = local.adapter.b()[i];
builder.send_byte(
AB::F::from_canonical_u32(ByteOpcode::Range as u32),
local.lower_limb[i],
AB::Expr::from_canonical_u32(16) - bit_shift.clone(),
AB::Expr::zero(),
is_real.clone(),
);
builder.send_byte(
AB::F::from_canonical_u32(ByteOpcode::Range as u32),
local.higher_limb[i],
bit_shift.clone(),
AB::Expr::zero(),
is_real.clone(),
);
builder.assert_eq(
limb * local.v_0123,
local.higher_limb[i] * AB::Expr::from_canonical_u32(1 << 16)
+ local.lower_limb[i] * local.v_0123,
);
}
for i in 0..WORD_SIZE {
let mut limb_result = local.lower_limb[i] * local.v_0123;
if i != 0 {
limb_result = limb_result.clone() + local.higher_limb[i - 1];
}
builder.assert_eq(local.limb_result[i], limb_result);
}
for i in 0..WORD_SIZE {
for j in 0..WORD_SIZE {
if j < i {
builder.when(local.is_sll).when(local.shift_u16[i]).assert_zero(local.a[j]);
} else {
builder
.when(local.is_sll)
.when(local.shift_u16[i])
.assert_eq(local.a[j], local.limb_result[j - i]);
}
}
}
for i in 0..WORD_SIZE / 2 {
for j in 0..WORD_SIZE / 2 {
if j < i {
builder.when(local.is_sllw).when(local.shift_u16[i]).assert_zero(local.a[j]);
} else {
builder
.when(local.is_sllw)
.when(local.shift_u16[i])
.assert_eq(local.a[j], local.limb_result[j - i]);
}
}
}
let u16_max = AB::F::from_canonical_u16(u16::MAX);
for i in WORD_SIZE / 2..WORD_SIZE {
builder.when(local.is_sllw).assert_eq(local.sllw_msb.msb * u16_max, local.a[i]);
}
U16MSBOperation::<AB::F>::eval(
builder,
U16MSBOperationInput::new(local.a[1].into(), local.sllw_msb, local.is_sllw.into()),
);
let opcode = local.is_sll * AB::F::from_canonical_u32(Opcode::SLL as u32)
+ local.is_sllw * AB::F::from_canonical_u32(Opcode::SLLW as u32);
let funct3 = local.is_sll * AB::Expr::from_canonical_u8(Opcode::SLL.funct3().unwrap())
+ local.is_sllw * AB::Expr::from_canonical_u8(Opcode::SLLW.funct3().unwrap());
let funct7 = local.is_sll * AB::Expr::from_canonical_u8(Opcode::SLL.funct7().unwrap_or(0))
+ local.is_sllw * AB::Expr::from_canonical_u8(Opcode::SLLW.funct7().unwrap_or(0));
let (sll_base, sll_imm) = Opcode::SLL.base_opcode();
let sll_imm = sll_imm.expect("SLL immediate opcode not found");
let (sllw_base, sllw_imm) = Opcode::SLLW.base_opcode();
let sllw_imm = sllw_imm.expect("SLLW immediate opcode not found");
let imm_base_difference = sll_base.checked_sub(sll_imm).unwrap();
assert!(imm_base_difference == sllw_base.checked_sub(sllw_imm).unwrap());
let sll_base_expr = AB::Expr::from_canonical_u32(sll_base);
let sllw_base_expr = AB::Expr::from_canonical_u32(sllw_base);
let calculated_base_opcode = local.is_sll * sll_base_expr + local.is_sllw * sllw_base_expr
- AB::Expr::from_canonical_u32(imm_base_difference) * local.adapter.imm_c;
let sll_instr_type = Opcode::SLL.instruction_type().0 as u32;
let sll_instr_type_imm =
Opcode::SLL.instruction_type().1.expect("SLL immediate instruction type not found")
as u32;
let sllw_instr_type = Opcode::SLLW.instruction_type().0 as u32;
let sllw_instr_type_imm =
Opcode::SLLW.instruction_type().1.expect("SLLW immediate instruction type not found")
as u32;
let instr_type_difference = sll_instr_type.checked_sub(sll_instr_type_imm).unwrap();
let w_instr_imm_adjustment = sll_instr_type_imm.checked_sub(sllw_instr_type_imm).unwrap();
assert_eq!(
sllw_instr_type.checked_sub(sllw_instr_type_imm).unwrap(),
instr_type_difference + w_instr_imm_adjustment,
);
builder.assert_eq(local.is_sllw_imm, local.is_sllw * local.adapter.imm_c);
let calculated_instr_type = local.is_sll * AB::Expr::from_canonical_u32(sll_instr_type)
+ local.is_sllw * AB::Expr::from_canonical_u32(sllw_instr_type)
- (AB::Expr::from_canonical_u32(instr_type_difference) * local.adapter.imm_c
+ AB::Expr::from_canonical_u32(w_instr_imm_adjustment) * local.is_sllw_imm);
<CPUState<AB::F> as SP1Operation<AB>>::eval(
builder,
CPUStateInput::new(
local.state,
[
local.state.pc[0] + AB::F::from_canonical_u32(PC_INC),
local.state.pc[1].into(),
local.state.pc[2].into(),
],
AB::Expr::from_canonical_u32(CLK_INC),
is_real.clone(),
),
);
let alu_reader_input = ALUTypeReaderInput::<AB, AB::Expr>::new(
local.state.clk_high::<AB>(),
local.state.clk_low::<AB>(),
local.state.pc,
opcode,
[calculated_instr_type, calculated_base_opcode, funct3, funct7],
local.a.map(|x| x.into()),
local.adapter,
is_real.clone(),
);
ALUTypeReader::<AB::F>::eval(builder, alu_reader_input);
}
}