Skip to main content

sp1_core_executor/
program.rs

1//! Programs that can be executed by the SP1 zkVM.
2
3use std::{fs::File, io::Read, str::FromStr};
4
5use crate::{
6    disassembler::{transpile, Elf},
7    instruction::Instruction,
8    RiscvAirId,
9};
10use hashbrown::HashMap;
11use serde::{Deserialize, Serialize};
12use slop_algebra::{Field, PrimeField32};
13use slop_maybe_rayon::prelude::{IntoParallelIterator, ParallelBridge, ParallelIterator};
14use sp1_hypercube::{
15    air::{MachineAir, MachineProgram},
16    septic_curve::{SepticCurve, SepticCurveComplete},
17    septic_digest::SepticDigest,
18    shape::Shape,
19    InteractionKind, UntrustedConfig,
20};
21use sp1_primitives::consts::split_page_idx;
22use std::sync::Arc;
23
24#[cfg(feature = "mprotect")]
25use sp1_hypercube::addr_to_limbs;
26
27/// The maximum number of instructions in a program.
28pub const MAX_PROGRAM_SIZE: usize = 1 << 22;
29
30/// A program that can be executed by the SP1 zkVM.
31///
32/// Contains a series of instructions along with the initial memory image. It also contains the
33/// start address and base address of the program.
34#[derive(Debug, Clone, Default, Serialize, Deserialize, deepsize2::DeepSizeOf)]
35pub struct Program {
36    /// The instructions of the program.
37    pub instructions: Vec<Instruction>,
38    /// The encoded instructions of the program. Only used if program is untrusted
39    pub instructions_encoded: Option<Vec<u32>>,
40    /// The start address of the program. It is absolute, meaning not relative to `pc_base`.
41    pub pc_start_abs: u64,
42    /// The base address of the program.
43    pub pc_base: u64,
44    /// The trap context address of the program.
45    pub trap_context: Option<u64>,
46    /// The initial page protection image, mapping page indices to protection flags.
47    pub page_prot_image: HashMap<u64, u8>,
48    /// The initial memory image, useful for global constants
49    pub memory_image: Arc<HashMap<u64, u64>>,
50    /// The shape for the preprocessed tables.
51    pub preprocessed_shape: Option<Shape<RiscvAirId>>,
52    /// Flag indicating if untrusted programs are allowed.
53    pub enable_untrusted_programs: bool,
54    /// Function symbols for profiling & debugging. In the form of (name, start address, size)
55    pub function_symbols: Vec<(String, u64, u64)>,
56    /// The memory region where untrusted program could live in. It is also the
57    /// memory region mprotect works on.
58    pub untrusted_memory: Option<(u64, u64)>,
59    /// The profiler stack from a dump-elf/bootloader session.
60    pub dump_elf_stack: Vec<u64>,
61}
62
63impl Program {
64    /// Create a new [Program].
65    #[must_use]
66    pub fn new(instructions: Vec<Instruction>, pc_start_abs: u64, pc_base: u64) -> Self {
67        assert!(!instructions.is_empty(), "empty program not supported");
68        assert!(instructions.len() <= (1 << 22), "program has too many instructions");
69
70        Self {
71            instructions,
72            instructions_encoded: None,
73            pc_start_abs,
74            pc_base,
75            trap_context: None,
76            page_prot_image: HashMap::new(),
77            memory_image: Arc::new(HashMap::new()),
78            preprocessed_shape: None,
79            enable_untrusted_programs: false,
80            untrusted_memory: None,
81            dump_elf_stack: Vec::new(),
82            function_symbols: Vec::new(),
83        }
84    }
85
86    /// Disassemble a RV64IM ELF to a program that be executed by the VM.
87    ///
88    /// # Errors
89    ///
90    /// This function may return an error if the ELF is not valid.
91    pub fn from(input: &[u8]) -> eyre::Result<Self> {
92        // Decode the bytes as an ELF.
93        let elf = Elf::decode(input)?;
94
95        if elf.pc_base < 32 {
96            eyre::bail!("elf with pc_base < 32 is not supported");
97        }
98        if elf.pc_base % 4 != 0 {
99            eyre::bail!("elf with pc_base not a multiple of 4 is not supported");
100        }
101
102        // Transpile the RV64IM instructions.
103        let instruction_pair = transpile(&elf.instructions, false);
104        let (instructions, instructions_encoded): (Vec<Instruction>, Vec<u32>) =
105            instruction_pair.into_iter().unzip();
106
107        if instructions.is_empty() {
108            eyre::bail!("empty elf not supported");
109        }
110        if instructions.len() > (1 << 22) {
111            eyre::bail!("elf has too many instructions");
112        }
113
114        let enable_untrusted_programs = elf.untrusted_memory.is_some();
115        // Return the program.
116        Ok(Program {
117            instructions,
118            instructions_encoded: Some(instructions_encoded),
119            pc_start_abs: elf.pc_start,
120            pc_base: elf.pc_base,
121            trap_context: elf.trap_context,
122            memory_image: elf.memory_image,
123            page_prot_image: elf.page_prot_image,
124            preprocessed_shape: None,
125            enable_untrusted_programs,
126            function_symbols: elf.function_symbols,
127            untrusted_memory: elf.untrusted_memory,
128            dump_elf_stack: elf.dump_elf_stack,
129        })
130    }
131
132    /// Disassemble a RV64IM ELF to a program that be executed by the VM from a file path.
133    ///
134    /// # Errors
135    ///
136    /// This function will return an error if the file cannot be opened or read.
137    pub fn from_elf(path: &str) -> eyre::Result<Self> {
138        let mut elf_code = Vec::new();
139        File::open(path)?.read_to_end(&mut elf_code)?;
140        Program::from(&elf_code)
141    }
142
143    /// Custom logic for padding the trace to a power of two according to the proof shape.
144    pub fn fixed_log2_rows<F: Field, A: MachineAir<F>>(&self, air: &A) -> Option<usize> {
145        let id = RiscvAirId::from_str(air.name()).unwrap();
146        self.preprocessed_shape.as_ref().map(|shape| {
147            shape
148                .log2_height(&id)
149                .unwrap_or_else(|| panic!("Chip {} not found in specified shape", air.name()))
150        })
151    }
152
153    #[must_use]
154    /// Fetch the instruction at the given program counter.
155    pub fn fetch(&self, pc: u64) -> Option<&Instruction> {
156        let idx = ((pc - self.pc_base) / 4) as usize;
157        self.instructions.get(idx)
158    }
159}
160
161impl<F: PrimeField32> MachineProgram<F> for Program {
162    fn pc_start(&self) -> [F; 3] {
163        [
164            F::from_canonical_u16((self.pc_start_abs & 0xFFFF) as u16),
165            F::from_canonical_u16(((self.pc_start_abs >> 16) & 0xFFFF) as u16),
166            F::from_canonical_u16(((self.pc_start_abs >> 32) & 0xFFFF) as u16),
167        ]
168    }
169
170    fn initial_global_cumulative_sum(&self) -> SepticDigest<F> {
171        let mut memory_digests: Vec<SepticCurveComplete<F>> = self
172            .memory_image
173            .iter()
174            .par_bridge()
175            .map(|(&addr, &word)| {
176                let limb_1 = (word & 0xFFFF) as u32 + (1 << 16) * ((word >> 32) & 0xFF) as u32;
177                let limb_2 =
178                    ((word >> 16) & 0xFFFF) as u32 + (1 << 16) * ((word >> 40) & 0xFF) as u32;
179                let values = [
180                    (InteractionKind::Memory as u32) << 24,
181                    0,
182                    (addr & 0xFFFF) as u32,
183                    ((addr >> 16) & 0xFFFF) as u32,
184                    ((addr >> 32) & 0xFFFF) as u32,
185                    limb_1,
186                    limb_2,
187                    ((word >> 48) & 0xFFFF) as u32,
188                ];
189                let (point, _, _, _) =
190                    SepticCurve::<F>::lift_x(values.map(|x| F::from_canonical_u32(x)));
191                SepticCurveComplete::Affine(point.neg())
192            })
193            .collect();
194
195        if self.enable_untrusted_programs {
196            let page_prot_digests: Vec<SepticCurveComplete<F>> = self
197                .page_prot_image
198                .iter()
199                .par_bridge()
200                .map(|(&page_idx, &page_prot)| {
201                    // Use exact same encoding as PageProtGlobalChip Initialize events
202                    let page_idx_limbs = split_page_idx(page_idx);
203                    let values = [
204                        (InteractionKind::PageProtAccess as u32) << 24,
205                        0,
206                        page_idx_limbs[0].into(),
207                        page_idx_limbs[1].into(),
208                        page_idx_limbs[2].into(),
209                        page_prot.into(),
210                        0,
211                        0,
212                    ];
213                    let (point, _, _, _) =
214                        SepticCurve::<F>::lift_x(values.map(|x| F::from_canonical_u32(x)));
215                    SepticCurveComplete::Affine(point.neg())
216                })
217                .collect();
218
219            // Combine both memory and page protection contributions.
220            memory_digests.extend(page_prot_digests);
221        }
222
223        memory_digests.push(SepticCurveComplete::Affine(SepticDigest::<F>::zero().0));
224        SepticDigest(
225            memory_digests
226                .into_par_iter()
227                .reduce(|| SepticCurveComplete::Infinity, |a, b| a + b)
228                .point(),
229        )
230    }
231
232    fn untrusted_config(&self) -> UntrustedConfig<F> {
233        UntrustedConfig {
234            enable_untrusted_programs: F::from_bool(self.enable_untrusted_programs),
235            #[cfg(feature = "mprotect")]
236            enable_trap_handler: F::from_bool(self.trap_context.is_some()),
237            #[cfg(feature = "mprotect")]
238            trap_context: self.trap_context.map_or([[F::zero(); 3]; 3], |addr| {
239                [addr_to_limbs(addr), addr_to_limbs(addr + 8), addr_to_limbs(addr + 16)]
240            }),
241            #[cfg(feature = "mprotect")]
242            untrusted_memory: self.untrusted_memory.map_or([[F::zero(); 3]; 2], |(start, end)| {
243                [addr_to_limbs(start), addr_to_limbs(end)]
244            }),
245        }
246    }
247}