sp1_gpu_tracegen/recursion/
mod.rs1mod alu_base;
2mod alu_ext;
3mod convert;
4mod linear_layer;
5mod poseidon2_wide;
6mod prefix_sum_checks;
7mod sbox;
8mod select;
9
10use slop_alloc::mem::CopyError;
11use sp1_gpu_cudart::{DeviceMle, TaskScope};
12use sp1_recursion_machine::RecursionAir;
13
14use crate::{CudaTracegenAir, F};
15
16impl<const DEGREE: usize, const VAR_EVENTS_PER_ROW: usize> CudaTracegenAir<F>
17 for RecursionAir<F, DEGREE, VAR_EVENTS_PER_ROW>
18{
19 fn supports_device_preprocessed_tracegen(&self) -> bool {
20 match self {
21 Self::BaseAlu(chip) => chip.supports_device_preprocessed_tracegen(),
22 Self::ExtAlu(chip) => chip.supports_device_preprocessed_tracegen(),
23 Self::Poseidon2Wide(chip) => chip.supports_device_preprocessed_tracegen(),
24 Self::Poseidon2LinearLayer(chip) => chip.supports_device_preprocessed_tracegen(),
25 Self::Poseidon2SBox(chip) => chip.supports_device_preprocessed_tracegen(),
26 Self::ExtFeltConvert(chip) => chip.supports_device_preprocessed_tracegen(),
27 Self::Select(chip) => chip.supports_device_preprocessed_tracegen(),
28 Self::PrefixSumChecks(chip) => chip.supports_device_preprocessed_tracegen(),
29 Self::PublicValues(_) => false,
30 _ => false,
32 }
33 }
34
35 async fn generate_preprocessed_trace_device(
36 &self,
37 program: &Self::Program,
38 scope: &TaskScope,
39 ) -> Result<Option<DeviceMle<F>>, CopyError> {
40 match self {
41 Self::BaseAlu(chip) => chip.generate_preprocessed_trace_device(program, scope).await,
42 Self::ExtAlu(chip) => chip.generate_preprocessed_trace_device(program, scope).await,
43 Self::Poseidon2Wide(chip) => {
44 chip.generate_preprocessed_trace_device(program, scope).await
45 }
46 Self::Poseidon2LinearLayer(chip) => {
47 chip.generate_preprocessed_trace_device(program, scope).await
48 }
49 Self::Poseidon2SBox(chip) => {
50 chip.generate_preprocessed_trace_device(program, scope).await
51 }
52 Self::ExtFeltConvert(chip) => {
53 chip.generate_preprocessed_trace_device(program, scope).await
54 }
55 Self::Select(chip) => chip.generate_preprocessed_trace_device(program, scope).await,
56 Self::PrefixSumChecks(chip) => {
57 chip.generate_preprocessed_trace_device(program, scope).await
58 }
59 Self::PublicValues(_) => unimplemented!(),
60 _ => unimplemented!(),
62 }
63 }
64
65 fn supports_device_main_tracegen(&self) -> bool {
66 match self {
67 Self::BaseAlu(chip) => chip.supports_device_main_tracegen(),
68 Self::ExtAlu(chip) => chip.supports_device_main_tracegen(),
69 Self::Poseidon2Wide(chip) => chip.supports_device_main_tracegen(),
70 Self::Poseidon2LinearLayer(chip) => chip.supports_device_main_tracegen(),
71 Self::Poseidon2SBox(chip) => chip.supports_device_main_tracegen(),
72 Self::ExtFeltConvert(chip) => chip.supports_device_main_tracegen(),
73 Self::Select(chip) => chip.supports_device_main_tracegen(),
74 Self::PrefixSumChecks(chip) => chip.supports_device_main_tracegen(),
75 Self::PublicValues(_) => false,
76 _ => false,
78 }
79 }
80
81 async fn generate_trace_device(
82 &self,
83 input: &Self::Record,
84 output: &mut Self::Record,
85 scope: &TaskScope,
86 ) -> Result<DeviceMle<F>, CopyError> {
87 match self {
88 Self::BaseAlu(chip) => chip.generate_trace_device(input, output, scope).await,
89 Self::ExtAlu(chip) => chip.generate_trace_device(input, output, scope).await,
90 Self::Poseidon2Wide(chip) => chip.generate_trace_device(input, output, scope).await,
91 Self::Poseidon2LinearLayer(chip) => {
92 chip.generate_trace_device(input, output, scope).await
93 }
94 Self::Poseidon2SBox(chip) => chip.generate_trace_device(input, output, scope).await,
95 Self::ExtFeltConvert(chip) => chip.generate_trace_device(input, output, scope).await,
96 Self::Select(chip) => chip.generate_trace_device(input, output, scope).await,
97 Self::PrefixSumChecks(chip) => chip.generate_trace_device(input, output, scope).await,
98 Self::PublicValues(_) => unimplemented!(),
99 _ => unimplemented!(),
101 }
102 }
103}
104
105#[cfg(test)]
106pub(crate) mod tests {
107 use sp1_gpu_cudart::TaskScope;
108
109 use rand::{rngs::StdRng, SeedableRng};
110
111 use slop_tensor::Tensor;
112
113 use sp1_hypercube::air::MachineAir;
114 use sp1_recursion_executor::{
115 AnalyzedInstruction, BasicBlock, RawProgram, RecursionProgram, RootProgram, SeqBlock,
116 };
117
118 use crate::{CudaTracegenAir, F};
119
120 pub async fn test_preprocessed_tracegen<A>(
121 chip: A,
122 mut make_instr: impl FnMut(&mut StdRng) -> AnalyzedInstruction<F>,
123 scope: TaskScope,
124 ) where
125 A: CudaTracegenAir<F> + MachineAir<F, Program = RecursionProgram<F>>,
126 {
127 let mut rng = StdRng::seed_from_u64(0xDEADBEEF);
128
129 let instrs =
130 core::iter::repeat_with(|| make_instr(&mut rng)).take(1000).collect::<Vec<_>>();
131
132 let program = unsafe {
135 RecursionProgram::new_unchecked(RootProgram {
136 inner: RawProgram { seq_blocks: vec![SeqBlock::Basic(BasicBlock { instrs })] },
137 total_memory: 0, shape: None,
139 event_counts: Default::default(),
140 })
141 };
142
143 let trace = Tensor::<F>::from(
144 chip.generate_preprocessed_trace(&program)
145 .expect("should generate Some(preprocessed_trace)"),
146 );
147
148 let gpu_trace = chip
149 .generate_preprocessed_trace_device(&program, &scope)
150 .await
151 .expect("should copy events to device successfully")
152 .expect("should generate Some(preprocessed_trace)")
153 .to_host()
154 .expect("should copy trace to host successfully")
155 .into_guts();
156
157 let Some(SeqBlock::Basic(BasicBlock { instrs })) =
158 program.into_inner().inner.seq_blocks.pop()
159 else {
160 unreachable!()
161 };
162
163 crate::tests::test_traces_eq(&trace, &gpu_trace, &instrs);
164 }
165}