Skip to main content

sp1_jit/
risc.rs

1use std::{marker::PhantomData, sync::Arc};
2
3use memmap2::Mmap;
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
7#[repr(u8)]
8pub enum RiscRegister {
9    X0 = 0,
10    X1 = 1,
11    X2 = 2,
12    X3 = 3,
13    X4 = 4,
14    X5 = 5,
15    X6 = 6,
16    X7 = 7,
17    X8 = 8,
18    X9 = 9,
19    X10 = 10,
20    X11 = 11,
21    X12 = 12,
22    X13 = 13,
23    X14 = 14,
24    X15 = 15,
25    X16 = 16,
26    X17 = 17,
27    X18 = 18,
28    X19 = 19,
29    X20 = 20,
30    X21 = 21,
31    X22 = 22,
32    X23 = 23,
33    X24 = 24,
34    X25 = 25,
35    X26 = 26,
36    X27 = 27,
37    X28 = 28,
38    X29 = 29,
39    X30 = 30,
40    X31 = 31,
41}
42
43impl RiscRegister {
44    pub fn all_registers() -> &'static [RiscRegister] {
45        &[
46            RiscRegister::X0,
47            RiscRegister::X1,
48            RiscRegister::X2,
49            RiscRegister::X3,
50            RiscRegister::X4,
51            RiscRegister::X5,
52            RiscRegister::X6,
53            RiscRegister::X7,
54            RiscRegister::X8,
55            RiscRegister::X9,
56            RiscRegister::X10,
57            RiscRegister::X11,
58            RiscRegister::X12,
59            RiscRegister::X13,
60            RiscRegister::X14,
61            RiscRegister::X15,
62            RiscRegister::X16,
63            RiscRegister::X17,
64            RiscRegister::X18,
65            RiscRegister::X19,
66            RiscRegister::X20,
67            RiscRegister::X21,
68            RiscRegister::X22,
69            RiscRegister::X23,
70            RiscRegister::X24,
71            RiscRegister::X25,
72            RiscRegister::X26,
73            RiscRegister::X27,
74            RiscRegister::X28,
75            RiscRegister::X29,
76            RiscRegister::X30,
77            RiscRegister::X31,
78        ]
79    }
80}
81
82/// ALU operations can either have register or immediate operands.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
84pub enum RiscOperand {
85    Register(RiscRegister),
86    Immediate(i32),
87}
88
89impl From<RiscRegister> for RiscOperand {
90    fn from(reg: RiscRegister) -> Self {
91        RiscOperand::Register(reg)
92    }
93}
94
95impl From<u32> for RiscOperand {
96    fn from(imm: u32) -> Self {
97        RiscOperand::Immediate(imm as i32)
98    }
99}
100
101impl From<i32> for RiscOperand {
102    fn from(imm: i32) -> Self {
103        RiscOperand::Immediate(imm)
104    }
105}
106
107impl From<u64> for RiscOperand {
108    fn from(imm: u64) -> Self {
109        RiscOperand::Immediate(imm as i32)
110    }
111}
112
113#[repr(C)]
114#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub struct MemValue {
116    pub clk: u64,
117    pub value: u64,
118}
119
120/// A convience structure for getting offsets of fields in the actual [TraceChunk].
121#[repr(C)]
122pub struct TraceChunkHeader {
123    pub start_registers: [u64; 32],
124    pub pc_start: u64,
125    pub clk_start: u64,
126    pub clk_end: u64,
127    pub num_mem_reads: u64,
128}
129
130#[repr(C)]
131#[derive(Clone)]
132pub struct TraceChunkRaw(Arc<Mmap>);
133
134impl TraceChunkRaw {
135    /// # Safety
136    ///
137    /// - The mmap must be a valid [`TraceChunkHeader`].
138    /// - The mmap must contain valid [`MemValue`]s in after the header.
139    /// - The `num_mem_reads` must be the number of [`MemValue`]s in the mmap after the header.
140    pub unsafe fn new(inner: Mmap) -> Self {
141        Self(Arc::new(inner))
142    }
143}
144
145impl MinimalTrace for TraceChunkRaw {
146    fn start_registers(&self) -> [u64; 32] {
147        let offset = std::mem::offset_of!(TraceChunkHeader, start_registers);
148
149        unsafe { std::ptr::read_unaligned(self.0.as_ptr().add(offset) as *const [u64; 32]) }
150    }
151
152    fn pc_start(&self) -> u64 {
153        let offset = std::mem::offset_of!(TraceChunkHeader, pc_start);
154
155        unsafe { std::ptr::read_unaligned(self.0.as_ptr().add(offset) as *const u64) }
156    }
157
158    fn clk_start(&self) -> u64 {
159        let offset = std::mem::offset_of!(TraceChunkHeader, clk_start);
160
161        unsafe { std::ptr::read_unaligned(self.0.as_ptr().add(offset) as *const u64) }
162    }
163
164    fn clk_end(&self) -> u64 {
165        let offset = std::mem::offset_of!(TraceChunkHeader, clk_end);
166
167        unsafe { std::ptr::read_unaligned(self.0.as_ptr().add(offset) as *const u64) }
168    }
169
170    fn num_mem_reads(&self) -> u64 {
171        let offset = std::mem::offset_of!(TraceChunkHeader, num_mem_reads);
172
173        unsafe { std::ptr::read_unaligned(self.0.as_ptr().add(offset) as *const u64) }
174    }
175
176    fn mem_reads(&self) -> MemReads<'_> {
177        let header_end = std::mem::size_of::<TraceChunkHeader>();
178        let len = self.num_mem_reads() as usize;
179
180        debug_assert!(self.0.len() - header_end >= len);
181
182        // SAFETY:
183        // - The memory is valid assuming num_mem_reads is correct.
184        // - The memory is technically always valid for reads since all bitpatterns are valid for
185        //   `MemValue`.
186        unsafe { MemReads::new(self.0.as_ptr().add(header_end) as *const MemValue, len) }
187    }
188}
189
190pub struct MemReads<'a> {
191    inner: *const MemValue,
192    end: *const MemValue,
193    /// Capture the lifetime of the buffer for saftey reasons.
194    _phantom: PhantomData<&'a ()>,
195}
196
197impl<'a> MemReads<'a> {
198    /// # Safety
199    ///
200    /// - The underlying memory is valid and contains valid `MemValue`s.
201    /// - The length is the number of `MemValue`s in the underlying memory.
202    pub(crate) unsafe fn new(inner: *const MemValue, len: usize) -> Self {
203        debug_assert!(inner.is_aligned(), "MemReads ptr is not aligned");
204
205        Self { inner, end: inner.add(len), _phantom: PhantomData }
206    }
207
208    /// Advance the pointer by `n` elements.
209    ///
210    /// # Panics
211    ///
212    /// Panics if `n` is greater than the purported length of the underlying buffer.
213    pub fn advance(&mut self, n: usize) {
214        unsafe {
215            let advanced = self.inner.add(n);
216
217            if advanced > self.end {
218                panic!("Cannot advance by more than the length of the slice");
219            }
220
221            self.inner = advanced;
222        }
223    }
224
225    /// Get the raw pointer to the head of the slice.
226    pub fn head_raw(&self) -> *const MemValue {
227        self.inner
228    }
229
230    /// The remaining length of the slice from our current position.
231    #[must_use]
232    pub fn len(&self) -> usize {
233        unsafe { self.end.offset_from_unsigned(self.inner) }
234    }
235
236    /// Check if the iterator is empty.
237    #[must_use]
238    pub fn is_empty(&self) -> bool {
239        self.inner == self.end
240    }
241}
242
243impl<'a> Iterator for MemReads<'a> {
244    type Item = MemValue;
245
246    fn next(&mut self) -> Option<Self::Item> {
247        if self.inner == self.end {
248            None
249        } else {
250            let value = unsafe { std::ptr::read(self.inner) };
251            self.inner = unsafe { self.inner.add(1) };
252
253            Some(value)
254        }
255    }
256}
257
258/// A trace chunk is all the data needed to continue the execution of a program at
259/// pc_start/clk_start.
260///
261/// We transmute this type directly from bytes, and the buffer should be of [TraceChunkRaw] form,
262/// plus, a slice of the memory reads.
263///
264/// When we read this type from the buffer, we will copy the registers, the pc/clk start and end,
265/// and take a pointer to the memory reads, by reading the num_mem_vals field.
266///
267/// The fields should be placed in the buffer according to the layout of [TraceChunkRaw].
268#[repr(C)]
269#[derive(Default, Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
270pub struct TraceChunk {
271    pub start_registers: [u64; 32],
272    pub pc_start: u64,
273    pub clk_start: u64,
274    pub clk_end: u64,
275    #[serde(serialize_with = "ser::serialize_mem_reads")]
276    #[serde(deserialize_with = "ser::deserialize_mem_reads")]
277    pub mem_reads: Arc<[MemValue]>,
278}
279
280impl From<TraceChunkRaw> for TraceChunk {
281    fn from(raw: TraceChunkRaw) -> Self {
282        TraceChunk::copy_from_bytes(raw.0.as_ref())
283    }
284}
285
286impl TraceChunk {
287    /// Copy the bytes into a [TraceChunk]. We dont just back it with the original bytes,
288    /// since this type is likely to be sent off to worker for proving.
289    ///
290    /// # Note:
291    /// This method will panic if the buffer is not large enough,
292    /// or the number of reads causes an overflow.
293    pub fn copy_from_bytes(src: &[u8]) -> Self {
294        const HDR: usize = size_of::<TraceChunkHeader>();
295
296        /* ---------- 1. header must fit ---------- */
297        if src.len() < HDR {
298            panic!("TraceChunk header too small");
299        }
300
301        /* ---------- 2. copy-out the header ---------- */
302        // SAFETY:
303        // we just checked that `src` contains at least `HDR` bytes,
304        // and `read_unaligne
305        //
306        // Note: All bit patterns are valid for `TraceChunkRaw`.
307        let raw: TraceChunkHeader =
308            unsafe { core::ptr::read_unaligned(src.as_ptr() as *const TraceChunkHeader) };
309
310        /* ---------- 3. tail must fit ---------- */
311        let n_words = raw.num_mem_reads as usize;
312        let n_bytes = n_words.checked_mul(size_of::<MemValue>()).expect("Num mem reads too large");
313        let total = HDR.checked_add(n_bytes).expect("Num mem reads too large");
314        if src.len() < total {
315            panic!("TraceChunk tail too small");
316        }
317
318        /* ---------- 4. extract tail ---------- */
319        let tail = &src[HDR..total]; // only after the length check
320
321        let mem_reads = Arc::new_uninit_slice(n_words);
322
323        // SAFETY:
324        // - The tail contains valid u64s, so doing a bitwise copy preserves the validity and
325        //   endianness.
326        // - tail is likely unaligned, so casting to a u8 pointer gives the alignmnt guarantee the
327        //   compiler needs to do a copy.
328        // - `mem_reads` was just allocated to have enough space.
329        // - u8 has minimum alignment, so casting the pointer allocated by the vec is valid.
330        // - The cast from const -> mut is valid since there are no other references to the memory.
331        //
332        // This trick is mostly taken from [`std::ptr::read_unaligned`]
333        // see: <https://doc.rust-lang.org/src/core/ptr/mod.rs.html#1811>.
334        unsafe {
335            std::ptr::copy_nonoverlapping(tail.as_ptr(), mem_reads.as_ptr() as *mut u8, n_bytes)
336        };
337
338        Self {
339            start_registers: raw.start_registers,
340            pc_start: raw.pc_start,
341            clk_start: raw.clk_start,
342            clk_end: raw.clk_end,
343            // SAFETY: We know the memory is initialized, so we can assume it.
344            mem_reads: unsafe { mem_reads.assume_init() },
345        }
346    }
347}
348
349/// A trait that represents a minimal trace.
350///
351/// A minimal trace is the minimum required information to rexecute from
352/// `pc_start` and `clk_start` -> `clk_end`.
353///
354/// It effectively acts as an oracle for the results of memory read operations.
355pub trait MinimalTrace: Clone + Send + Sync + 'static {
356    fn start_registers(&self) -> [u64; 32];
357
358    fn pc_start(&self) -> u64;
359
360    fn clk_start(&self) -> u64;
361
362    fn clk_end(&self) -> u64;
363
364    fn num_mem_reads(&self) -> u64;
365
366    fn mem_reads(&self) -> MemReads<'_>;
367}
368
369impl MinimalTrace for TraceChunk {
370    fn start_registers(&self) -> [u64; 32] {
371        self.start_registers
372    }
373
374    fn pc_start(&self) -> u64 {
375        self.pc_start
376    }
377
378    fn clk_start(&self) -> u64 {
379        self.clk_start
380    }
381
382    fn clk_end(&self) -> u64 {
383        self.clk_end
384    }
385
386    fn num_mem_reads(&self) -> u64 {
387        self.mem_reads.len() as u64
388    }
389
390    fn mem_reads(&self) -> MemReads<'_> {
391        // SAFETY:
392        // - The memory is technically always valid for reads since all bitpatterns are valid for
393        //   `MemValue`.
394        // - the length comes directly from the Vec, which we know to be valid.
395        unsafe { MemReads::new(self.mem_reads.as_ptr(), self.mem_reads.len()) }
396    }
397}
398
399mod ser {
400    use super::*;
401    use serde::{Deserializer, Serializer};
402
403    pub fn serialize_mem_reads<S: Serializer>(
404        mem_reads: &Arc<[MemValue]>,
405        serializer: S,
406    ) -> Result<S::Ok, S::Error> {
407        let as_vec: Vec<MemValue> = Vec::from(&mem_reads[..]);
408
409        Vec::serialize(&as_vec, serializer)
410    }
411
412    pub fn deserialize_mem_reads<'a, D: Deserializer<'a>>(
413        deserializer: D,
414    ) -> Result<Arc<[MemValue]>, D::Error> {
415        let as_vec = Vec::deserialize(deserializer)?;
416
417        Ok(as_vec.into())
418    }
419
420    #[test]
421    #[cfg(test)]
422    fn test_mem_reads() {
423        let mem_reads = Arc::new([MemValue { clk: 0, value: 0 }, MemValue { clk: 1, value: 1 }]);
424        let trace = TraceChunk {
425            start_registers: [5; 32],
426            pc_start: 6,
427            clk_start: 7,
428            clk_end: 8,
429            mem_reads,
430        };
431
432        let serialized = bincode::serialize(&trace).unwrap();
433        let deserialized = bincode::deserialize(&serialized).unwrap();
434
435        assert_eq!(trace, deserialized);
436    }
437}