use crate::{
adapter::{
register::i_type::{ITypeReader, ITypeReaderInput},
state::{CPUState, CPUStateInput},
},
air::{SP1CoreAirBuilder, SP1Operation},
memory::MemoryAccessCols,
operations::{AddressOperation, AddressOperationInput, U16MSBOperation, U16MSBOperationInput},
utils::next_multiple_of_32,
};
use hashbrown::HashMap;
use itertools::Itertools;
use rayon::iter::{ParallelBridge, ParallelIterator};
use slop_air::{Air, AirBuilder, BaseAir};
use slop_algebra::{AbstractField, PrimeField32};
use slop_matrix::Matrix;
use sp1_core_executor::{
events::{ByteLookupEvent, ByteRecord, MemInstrEvent, MemoryAccessPosition},
ExecutionRecord, Opcode, Program, CLK_INC, PC_INC,
};
use sp1_derive::AlignedBorrow;
use sp1_hypercube::{
air::{BaseAirBuilder, MachineAir},
Word,
};
use sp1_primitives::consts::u64_to_u16_limbs;
use std::{
borrow::{Borrow, BorrowMut},
mem::{size_of, MaybeUninit},
};
#[derive(Default)]
pub struct LoadHalfChip;
pub const NUM_LOAD_HALF_COLUMNS: usize = size_of::<LoadHalfColumns<u8>>();
#[derive(AlignedBorrow, Default, Debug, Clone, Copy)]
#[repr(C)]
pub struct LoadHalfColumns<T> {
pub state: CPUState<T>,
pub adapter: ITypeReader<T>,
pub address_operation: AddressOperation<T>,
pub memory_access: MemoryAccessCols<T>,
pub offset_bit: [T; 2],
pub selected_half: T,
pub msb: U16MSBOperation<T>,
pub is_lh: T,
pub is_lhu: T,
}
impl<F> BaseAir<F> for LoadHalfChip {
fn width(&self) -> usize {
NUM_LOAD_HALF_COLUMNS
}
}
impl<F: PrimeField32> MachineAir<F> for LoadHalfChip {
type Record = ExecutionRecord;
type Program = Program;
fn name(&self) -> &'static str {
"LoadHalf"
}
fn num_rows(&self, input: &Self::Record) -> Option<usize> {
let nb_rows = next_multiple_of_32(
input.memory_load_half_events.len(),
input.fixed_log2_rows::<F, _>(self),
);
Some(nb_rows)
}
fn generate_trace_into(
&self,
input: &ExecutionRecord,
output: &mut ExecutionRecord,
buffer: &mut [MaybeUninit<F>],
) {
let chunk_size = std::cmp::max((input.memory_load_half_events.len()) / num_cpus::get(), 1);
let padded_nb_rows = <LoadHalfChip as MachineAir<F>>::num_rows(self, input).unwrap();
let num_event_rows = input.memory_load_half_events.len();
unsafe {
let padding_start = num_event_rows * NUM_LOAD_HALF_COLUMNS;
let padding_size = (padded_nb_rows - num_event_rows) * NUM_LOAD_HALF_COLUMNS;
if padding_size > 0 {
core::ptr::write_bytes(buffer[padding_start..].as_mut_ptr(), 0, padding_size);
}
}
let buffer_ptr = buffer.as_mut_ptr() as *mut F;
let values = unsafe {
core::slice::from_raw_parts_mut(buffer_ptr, padded_nb_rows * NUM_LOAD_HALF_COLUMNS)
};
let blu_events = values
.chunks_mut(chunk_size * NUM_LOAD_HALF_COLUMNS)
.enumerate()
.par_bridge()
.map(|(i, rows)| {
let mut blu: HashMap<ByteLookupEvent, usize> = HashMap::new();
rows.chunks_mut(NUM_LOAD_HALF_COLUMNS).enumerate().for_each(|(j, row)| {
let idx = i * chunk_size + j;
let cols: &mut LoadHalfColumns<F> = row.borrow_mut();
if idx < input.memory_load_half_events.len() {
let event = &input.memory_load_half_events[idx];
self.event_to_row(&event.0, cols, &mut blu);
cols.state.populate(&mut blu, event.0.clk, event.0.pc);
cols.adapter.populate(&mut blu, event.1);
}
});
blu
})
.collect::<Vec<_>>();
output.add_byte_lookup_events_from_maps(blu_events.iter().collect_vec());
}
fn included(&self, shard: &Self::Record) -> bool {
if let Some(shape) = shard.shape.as_ref() {
shape.included::<F, _>(self)
} else {
!shard.memory_load_half_events.is_empty()
}
}
}
impl LoadHalfChip {
fn event_to_row<F: PrimeField32>(
&self,
event: &MemInstrEvent,
cols: &mut LoadHalfColumns<F>,
blu: &mut HashMap<ByteLookupEvent, usize>,
) {
cols.memory_access.populate(event.mem_access, blu);
let memory_addr = cols.address_operation.populate(blu, event.b, event.c);
debug_assert!(memory_addr.is_multiple_of(2));
let bit_1 = ((memory_addr >> 1) & 1) as u16;
let bit_2 = ((memory_addr >> 2) & 1) as u16;
let limb_number = 2 * bit_2 + bit_1;
cols.offset_bit[0] = F::from_canonical_u16(bit_1);
cols.offset_bit[1] = F::from_canonical_u16(bit_2);
let limb = u64_to_u16_limbs(event.mem_access.value())[limb_number as usize];
cols.selected_half = F::from_canonical_u16(limb);
if event.opcode == Opcode::LH {
cols.is_lh = F::one();
cols.is_lhu = F::zero();
cols.msb.populate_msb(blu, limb);
} else {
cols.is_lh = F::zero();
cols.is_lhu = F::one();
cols.msb.msb = F::zero();
}
}
}
impl<AB> Air<AB> for LoadHalfChip
where
AB: SP1CoreAirBuilder,
AB::Var: Sized,
{
#[inline(never)]
fn eval(&self, builder: &mut AB) {
let main = builder.main();
let local = main.row_slice(0);
let local: &LoadHalfColumns<AB::Var> = (*local).borrow();
let clk_high = local.state.clk_high::<AB>();
let clk_low = local.state.clk_low::<AB>();
let opcode = AB::Expr::from_canonical_u32(Opcode::LH as u32) * local.is_lh
+ AB::Expr::from_canonical_u32(Opcode::LHU as u32) * local.is_lhu;
let funct3 = local.is_lh * AB::Expr::from_canonical_u8(Opcode::LH.funct3().unwrap())
+ local.is_lhu * AB::Expr::from_canonical_u8(Opcode::LHU.funct3().unwrap());
let funct7 = local.is_lh * AB::Expr::from_canonical_u8(Opcode::LH.funct7().unwrap_or(0))
+ local.is_lhu * AB::Expr::from_canonical_u8(Opcode::LHU.funct7().unwrap_or(0));
let base_opcode = local.is_lh * AB::Expr::from_canonical_u32(Opcode::LH.base_opcode().0)
+ local.is_lhu * AB::Expr::from_canonical_u32(Opcode::LHU.base_opcode().0);
let instr_type = local.is_lh
* AB::Expr::from_canonical_u32(Opcode::LH.instruction_type().0 as u32)
+ local.is_lhu * AB::Expr::from_canonical_u32(Opcode::LHU.instruction_type().0 as u32);
let is_real = local.is_lh + local.is_lhu;
builder.assert_bool(local.is_lh);
builder.assert_bool(local.is_lhu);
builder.assert_bool(is_real.clone());
let aligned_addr = <AddressOperation<AB::F> as SP1Operation<AB>>::eval(
builder,
AddressOperationInput::new(
local.adapter.b().map(Into::into),
local.adapter.c().map(Into::into),
AB::Expr::zero(),
local.offset_bit[0].into(),
local.offset_bit[1].into(),
is_real.clone(),
local.address_operation,
),
);
builder.eval_memory_access_read(
clk_high.clone(),
clk_low.clone() + AB::Expr::from_canonical_u32(MemoryAccessPosition::Memory as u32),
&aligned_addr.clone().map(Into::into),
local.memory_access,
is_real.clone(),
);
builder.assert_zero(local.adapter.op_a_0);
builder
.when_not(local.offset_bit[0])
.when_not(local.offset_bit[1])
.assert_eq(local.selected_half, local.memory_access.prev_value[0]);
builder
.when(local.offset_bit[0])
.when_not(local.offset_bit[1])
.assert_eq(local.selected_half, local.memory_access.prev_value[1]);
builder
.when_not(local.offset_bit[0])
.when(local.offset_bit[1])
.assert_eq(local.selected_half, local.memory_access.prev_value[2]);
builder
.when(local.offset_bit[0])
.when(local.offset_bit[1])
.assert_eq(local.selected_half, local.memory_access.prev_value[3]);
builder.when(local.is_lhu).assert_zero(local.msb.msb);
<U16MSBOperation<AB::F> as SP1Operation<AB>>::eval(
builder,
U16MSBOperationInput::<AB>::new(
local.selected_half.into(),
local.msb,
local.is_lh.into(),
),
);
<CPUState<AB::F> as SP1Operation<AB>>::eval(
builder,
CPUStateInput {
cols: local.state,
next_pc: [
local.state.pc[0] + AB::F::from_canonical_u32(PC_INC),
local.state.pc[1].into(),
local.state.pc[2].into(),
],
clk_increment: AB::Expr::from_canonical_u32(CLK_INC),
is_real: is_real.clone(),
},
);
<ITypeReader<AB::F> as SP1Operation<AB>>::eval(
builder,
ITypeReaderInput::new(
clk_high.clone(),
clk_low.clone(),
local.state.pc,
opcode,
[instr_type, base_opcode, funct3, funct7],
Word([
local.selected_half.into(),
AB::Expr::from_canonical_u16(u16::MAX) * local.msb.msb,
AB::Expr::from_canonical_u16(u16::MAX) * local.msb.msb,
AB::Expr::from_canonical_u16(u16::MAX) * local.msb.msb,
]),
local.adapter,
is_real.clone(),
),
);
}
}