Skip to main content

sp1_gpu_tracegen/recursion/
mod.rs

1mod alu_base;
2mod alu_ext;
3mod convert;
4mod linear_layer;
5mod poseidon2_wide;
6mod prefix_sum_checks;
7mod sbox;
8mod select;
9
10use slop_alloc::mem::CopyError;
11use sp1_gpu_cudart::{DeviceMle, TaskScope};
12use sp1_recursion_machine::RecursionAir;
13
14use crate::{CudaTracegenAir, F};
15
16impl<const DEGREE: usize, const VAR_EVENTS_PER_ROW: usize> CudaTracegenAir<F>
17    for RecursionAir<F, DEGREE, VAR_EVENTS_PER_ROW>
18{
19    fn supports_device_preprocessed_tracegen(&self) -> bool {
20        match self {
21            Self::BaseAlu(chip) => chip.supports_device_preprocessed_tracegen(),
22            Self::ExtAlu(chip) => chip.supports_device_preprocessed_tracegen(),
23            Self::Poseidon2Wide(chip) => chip.supports_device_preprocessed_tracegen(),
24            Self::Poseidon2LinearLayer(chip) => chip.supports_device_preprocessed_tracegen(),
25            Self::Poseidon2SBox(chip) => chip.supports_device_preprocessed_tracegen(),
26            Self::ExtFeltConvert(chip) => chip.supports_device_preprocessed_tracegen(),
27            Self::Select(chip) => chip.supports_device_preprocessed_tracegen(),
28            Self::PrefixSumChecks(chip) => chip.supports_device_preprocessed_tracegen(),
29            Self::PublicValues(_) => false,
30            // Other chips don't have `CudaTracegenAir` implemented yet.
31            _ => false,
32        }
33    }
34
35    async fn generate_preprocessed_trace_device(
36        &self,
37        program: &Self::Program,
38        scope: &TaskScope,
39    ) -> Result<Option<DeviceMle<F>>, CopyError> {
40        match self {
41            Self::BaseAlu(chip) => chip.generate_preprocessed_trace_device(program, scope).await,
42            Self::ExtAlu(chip) => chip.generate_preprocessed_trace_device(program, scope).await,
43            Self::Poseidon2Wide(chip) => {
44                chip.generate_preprocessed_trace_device(program, scope).await
45            }
46            Self::Poseidon2LinearLayer(chip) => {
47                chip.generate_preprocessed_trace_device(program, scope).await
48            }
49            Self::Poseidon2SBox(chip) => {
50                chip.generate_preprocessed_trace_device(program, scope).await
51            }
52            Self::ExtFeltConvert(chip) => {
53                chip.generate_preprocessed_trace_device(program, scope).await
54            }
55            Self::Select(chip) => chip.generate_preprocessed_trace_device(program, scope).await,
56            Self::PrefixSumChecks(chip) => {
57                chip.generate_preprocessed_trace_device(program, scope).await
58            }
59            Self::PublicValues(_) => unimplemented!(),
60            // Other chips don't have `CudaTracegenAir` implemented yet.
61            _ => unimplemented!(),
62        }
63    }
64
65    fn supports_device_main_tracegen(&self) -> bool {
66        match self {
67            Self::BaseAlu(chip) => chip.supports_device_main_tracegen(),
68            Self::ExtAlu(chip) => chip.supports_device_main_tracegen(),
69            Self::Poseidon2Wide(chip) => chip.supports_device_main_tracegen(),
70            Self::Poseidon2LinearLayer(chip) => chip.supports_device_main_tracegen(),
71            Self::Poseidon2SBox(chip) => chip.supports_device_main_tracegen(),
72            Self::ExtFeltConvert(chip) => chip.supports_device_main_tracegen(),
73            Self::Select(chip) => chip.supports_device_main_tracegen(),
74            Self::PrefixSumChecks(chip) => chip.supports_device_main_tracegen(),
75            Self::PublicValues(_) => false,
76            // Other chips don't have `CudaTracegenAir` implemented yet.
77            _ => false,
78        }
79    }
80
81    async fn generate_trace_device(
82        &self,
83        input: &Self::Record,
84        output: &mut Self::Record,
85        scope: &TaskScope,
86    ) -> Result<DeviceMle<F>, CopyError> {
87        match self {
88            Self::BaseAlu(chip) => chip.generate_trace_device(input, output, scope).await,
89            Self::ExtAlu(chip) => chip.generate_trace_device(input, output, scope).await,
90            Self::Poseidon2Wide(chip) => chip.generate_trace_device(input, output, scope).await,
91            Self::Poseidon2LinearLayer(chip) => {
92                chip.generate_trace_device(input, output, scope).await
93            }
94            Self::Poseidon2SBox(chip) => chip.generate_trace_device(input, output, scope).await,
95            Self::ExtFeltConvert(chip) => chip.generate_trace_device(input, output, scope).await,
96            Self::Select(chip) => chip.generate_trace_device(input, output, scope).await,
97            Self::PrefixSumChecks(chip) => chip.generate_trace_device(input, output, scope).await,
98            Self::PublicValues(_) => unimplemented!(),
99            // Other chips don't have `CudaTracegenAir` implemented yet.
100            _ => unimplemented!(),
101        }
102    }
103}
104
105#[cfg(test)]
106pub(crate) mod tests {
107    use sp1_gpu_cudart::TaskScope;
108
109    use rand::{rngs::StdRng, SeedableRng};
110
111    use slop_tensor::Tensor;
112
113    use sp1_hypercube::air::MachineAir;
114    use sp1_recursion_executor::{
115        AnalyzedInstruction, BasicBlock, RawProgram, RecursionProgram, RootProgram, SeqBlock,
116    };
117
118    use crate::{CudaTracegenAir, F};
119
120    pub async fn test_preprocessed_tracegen<A>(
121        chip: A,
122        mut make_instr: impl FnMut(&mut StdRng) -> AnalyzedInstruction<F>,
123        scope: TaskScope,
124    ) where
125        A: CudaTracegenAir<F> + MachineAir<F, Program = RecursionProgram<F>>,
126    {
127        let mut rng = StdRng::seed_from_u64(0xDEADBEEF);
128
129        let instrs =
130            core::iter::repeat_with(|| make_instr(&mut rng)).take(1000).collect::<Vec<_>>();
131
132        // SAFETY: We don't actually execute the program, which requires that the invariants hold.
133        // We only generate preprocessed traces, which do not require that the invariants hold.
134        let program = unsafe {
135            RecursionProgram::new_unchecked(RootProgram {
136                inner: RawProgram { seq_blocks: vec![SeqBlock::Basic(BasicBlock { instrs })] },
137                total_memory: 0, // Will be filled in.
138                shape: None,
139                event_counts: Default::default(),
140            })
141        };
142
143        let trace = Tensor::<F>::from(
144            chip.generate_preprocessed_trace(&program)
145                .expect("should generate Some(preprocessed_trace)"),
146        );
147
148        let gpu_trace = chip
149            .generate_preprocessed_trace_device(&program, &scope)
150            .await
151            .expect("should copy events to device successfully")
152            .expect("should generate Some(preprocessed_trace)")
153            .to_host()
154            .expect("should copy trace to host successfully")
155            .into_guts();
156
157        let Some(SeqBlock::Basic(BasicBlock { instrs })) =
158            program.into_inner().inner.seq_blocks.pop()
159        else {
160            unreachable!()
161        };
162
163        crate::tests::test_traces_eq(&trace, &gpu_trace, &instrs);
164    }
165}