Skip to main content

hopper_native/
raw_input.rs

1//! Raw loader input parsing for Hopper Native.
2//!
3//! This is the single source of truth for Solana loader input decoding. It owns
4//! duplicate-account resolution, canonical-account lookup, and original-index
5//! tracking so higher layers operate on already-resolved account views.
6
7use core::mem::MaybeUninit;
8
9use crate::account_view::AccountView;
10use crate::address::Address;
11use crate::raw_account::RuntimeAccount;
12use crate::MAX_PERMITTED_DATA_INCREASE;
13
14const BPF_ALIGN_OF_U128: usize = 8;
15
16/// Malformed-input trap.
17///
18/// The Solana loader guarantees duplicate markers refer only to **earlier**
19/// account slots (Solana's account serialization documents the marker as
20/// "the index of the first account it is a duplicate of". necessarily a
21/// lower index). A forward-pointing marker therefore cannot be the result
22/// of a well-formed invocation: it either indicates a loader bug or
23/// adversarial input attempting to synthesize an aliasing `AccountView`.
24/// Pre-audit the parser silently fell back to account zero (or null for
25/// slot 0), which produced either a null-pointer `AccountView` or an
26/// aliasing view to an unrelated account. The Hopper Safety Audit flagged
27/// this as the most urgent must-fix. We now trap immediately via
28/// `sol_panic_` (on Solana) so the transaction fails at parse time.
29#[inline(never)]
30#[cold]
31pub(crate) fn malformed_duplicate_marker(marker: u8, slot: usize) -> ! {
32    #[cfg(target_os = "solana")]
33    unsafe {
34        // Keep the message short and on-chain-cheap. The loader log
35        // attaches the program id automatically.
36        const MSG: &[u8] = b"hopper: malformed duplicate marker";
37        crate::syscalls::sol_panic_(MSG.as_ptr(), MSG.len() as u64, slot as u64, marker as u64);
38    }
39    #[cfg(not(target_os = "solana"))]
40    {
41        panic!(
42            "hopper: malformed duplicate marker at slot {}: marker {} points forward",
43            slot, marker
44        );
45    }
46}
47
48/// Metadata for one parsed account slot in the loader input.
49#[derive(Clone, Copy, Debug, PartialEq, Eq)]
50pub struct RawAccountIndex {
51    /// Index of this slot in the original loader account array.
52    pub original_index: usize,
53    /// Canonical account index this slot resolves to, if duplicated.
54    pub duplicate_of: Option<usize>,
55}
56
57impl RawAccountIndex {
58    /// Whether this slot is a duplicate reference to an earlier account.
59    #[inline(always)]
60    pub const fn is_duplicate(&self) -> bool {
61        self.duplicate_of.is_some()
62    }
63}
64
65/// Instruction tail discovered after scanning the loader input buffer.
66#[derive(Clone)]
67pub struct RawInstructionFrame {
68    pub accounts_start: *mut u8,
69    pub account_count: usize,
70    pub instruction_data: &'static [u8],
71    pub program_id: Address,
72}
73
74/// Deserialize the loader input into `AccountView`s.
75///
76/// Duplicate-account resolution happens here. A duplicate slot reuses the
77/// canonical `RuntimeAccount` pointer of the earlier slot it references, and
78/// its `original_index` remains the loader slot where it appeared.
79///
80/// # Safety
81///
82/// `input` must point to a valid Solana BPF input buffer.
83#[inline(always)]
84pub unsafe fn deserialize_accounts<const MAX: usize>(
85    input: *mut u8,
86    accounts: &mut [MaybeUninit<AccountView>; MAX],
87) -> (Address, usize, &'static [u8]) {
88    let frame = unsafe { scan_instruction_frame(input) };
89
90    let mut offset = 8usize;
91    let count = frame.account_count.min(MAX);
92
93    let mut slot = 0usize;
94    while slot < count {
95        let marker = unsafe { *input.add(offset) };
96        if marker == u8::MAX {
97            let raw = unsafe { input.add(offset) as *mut RuntimeAccount };
98            accounts[slot] = MaybeUninit::new(unsafe { AccountView::new_unchecked(raw) });
99
100            let data_len = unsafe { (*raw).data_len as usize };
101            offset += RuntimeAccount::SIZE;
102            offset += data_len + MAX_PERMITTED_DATA_INCREASE;
103            offset += unsafe { input.add(offset).align_offset(BPF_ALIGN_OF_U128) };
104            offset += 8;
105        } else {
106            let duplicate_of = marker as usize;
107            // The marker must refer strictly to an earlier slot. Anything
108            // else (forward reference, or a duplicate marker on slot 0
109            // which has no prior slot to reference) is malformed loader
110            // input. we trap rather than synthesize a null or aliasing
111            // `AccountView`.
112            if duplicate_of >= slot {
113                malformed_duplicate_marker(marker, slot);
114            }
115            let raw = unsafe { accounts[duplicate_of].assume_init_ref().raw_ptr() };
116            accounts[slot] = MaybeUninit::new(unsafe { AccountView::new_unchecked(raw) });
117            offset += 8;
118        }
119
120        slot += 1;
121    }
122
123    while slot < frame.account_count {
124        let marker = unsafe { *input.add(offset) };
125        if marker == u8::MAX {
126            let raw = unsafe { input.add(offset) as *const RuntimeAccount };
127            let data_len = unsafe { (*raw).data_len as usize };
128            offset += RuntimeAccount::SIZE;
129            offset += data_len + MAX_PERMITTED_DATA_INCREASE;
130            offset += unsafe { input.add(offset).align_offset(BPF_ALIGN_OF_U128) };
131            offset += 8;
132        } else {
133            offset += 8;
134        }
135        slot += 1;
136    }
137
138    (frame.program_id, count, frame.instruction_data)
139}
140
141/// Fast two-argument deserialize: instruction data and program id are provided
142/// directly by the caller (from the SVM's second entrypoint register), so the
143/// full account-scan pass is skipped entirely.
144///
145/// # Safety
146///
147/// * `input` must point to a valid Solana BPF input buffer.
148/// * `ix_data` must point to the instruction data with its length stored as
149///   `u64` at offset `-8`.
150/// * `program_id` must be the correct program id for this invocation.
151#[inline(always)]
152pub unsafe fn deserialize_accounts_fast<const MAX: usize>(
153    input: *mut u8,
154    accounts: &mut [MaybeUninit<AccountView>; MAX],
155    instruction_data: &'static [u8],
156    program_id: Address,
157) -> (Address, usize, &'static [u8]) {
158    let num_accounts = unsafe { *(input as *const u64) as usize };
159    let count = num_accounts.min(MAX);
160    let mut offset = 8usize;
161
162    let mut slot = 0usize;
163    while slot < count {
164        let marker = unsafe { *input.add(offset) };
165        if marker == u8::MAX {
166            let raw = unsafe { input.add(offset) as *mut RuntimeAccount };
167            accounts[slot] = MaybeUninit::new(unsafe { AccountView::new_unchecked(raw) });
168
169            let data_len = unsafe { (*raw).data_len as usize };
170            offset += RuntimeAccount::SIZE;
171            offset += data_len + MAX_PERMITTED_DATA_INCREASE;
172            offset += unsafe { input.add(offset).align_offset(BPF_ALIGN_OF_U128) };
173            offset += 8;
174        } else {
175            let duplicate_of = marker as usize;
176            // Identical well-formedness check as the scanning-variant above.
177            if duplicate_of >= slot {
178                malformed_duplicate_marker(marker, slot);
179            }
180            let raw = unsafe { accounts[duplicate_of].assume_init_ref().raw_ptr() };
181            accounts[slot] = MaybeUninit::new(unsafe { AccountView::new_unchecked(raw) });
182            offset += 8;
183        }
184
185        slot += 1;
186    }
187
188    // Skip remaining accounts. not needed, but slot tracking isn't required
189    // since we don't need to find the instruction tail.
190
191    (program_id, count, instruction_data)
192}
193
194/// Parse just the instruction tail and account span from the loader input.
195///
196/// This supports both eager entrypoint parsing and lazy account iteration.
197/// The returned frame carries the original account span start so duplicate and
198/// canonical-account relationships remain defined at the loader level.
199///
200/// # Safety
201///
202/// `input` must point to a valid Solana BPF input buffer.
203#[inline(always)]
204pub unsafe fn scan_instruction_frame(input: *mut u8) -> RawInstructionFrame {
205    let mut scan = input;
206
207    let num_accounts = unsafe { *(scan as *const u64) as usize };
208    scan = unsafe { scan.add(8) };
209    let accounts_start = scan;
210
211    let mut slot = 0usize;
212    while slot < num_accounts {
213        let marker = unsafe { *scan };
214        if marker == u8::MAX {
215            let raw = scan as *const RuntimeAccount;
216            let data_len = unsafe { (*raw).data_len as usize };
217            let mut step = RuntimeAccount::SIZE + data_len + MAX_PERMITTED_DATA_INCREASE;
218            step += unsafe { scan.add(step).align_offset(BPF_ALIGN_OF_U128) };
219            step += 8;
220            scan = unsafe { scan.add(step) };
221        } else {
222            scan = unsafe { scan.add(8) };
223        }
224        slot += 1;
225    }
226
227    let data_len = unsafe { *(scan as *const u64) as usize };
228    scan = unsafe { scan.add(8) };
229    let instruction_data = unsafe { core::slice::from_raw_parts(scan as *const u8, data_len) };
230    scan = unsafe { scan.add(data_len) };
231
232    let program_id_ptr = scan as *const [u8; 32];
233    let program_id = Address::new_from_array(unsafe { *program_id_ptr });
234
235    RawInstructionFrame {
236        accounts_start,
237        account_count: num_accounts.min(254),
238        instruction_data,
239        program_id,
240    }
241}
242
243// =====================================================================
244// Safe bounds-checked loader-input parser (fuzz and off-chain harness).
245// =====================================================================
246//
247// The primary parser above is a pure-pointer fast path: on-chain it
248// consumes an SVM-loaded byte buffer whose layout is guaranteed by the
249// loader. Off-chain tools (`hopper dump`, `hopper test`, fuzz harnesses,
250// RPC decoders) do **not** have that guarantee. they receive arbitrary
251// byte slices. Feeding one to `scan_instruction_frame` would invite OOB
252// reads on any short / truncated input.
253//
254// `parse_instruction_frame_checked` is the safe companion: it walks a
255// `&[u8]` using a bounds-checked cursor and returns structured
256// `Result<FrameInfo, FrameError>`. It enforces exactly the same
257// duplicate-marker well-formedness rules (forward references are
258// rejected, not silently-aliased) and the same loader framing (88-byte
259// `RuntimeAccount` header, `MAX_PERMITTED_DATA_INCREASE` reserve, u128
260// alignment padding, `rent_epoch` tail, instruction_data with u64-LE
261// length prefix, 32-byte program id trailer).
262
263/// Hard cap on accounts the safe parser will record slot offsets for.
264///
265/// Matches Solana's own 256-account cap per instruction. Buffers that
266/// declare more than this are rejected with
267/// [`FrameError::AccountCountOutOfRange`].
268pub const MAX_SAFE_ACCOUNT_SLOTS: usize = 256;
269
270/// Summary of a safely-parsed loader input frame.
271///
272/// Only metadata is returned. the full `AccountView` construction
273/// requires the raw pointer path. This struct is what off-chain tools
274/// (and fuzz harnesses) need to verify a buffer is well-formed.
275///
276/// The `slot_offsets` array is a fixed `[usize; MAX_SAFE_ACCOUNT_SLOTS]`
277/// with the first `account_count` entries populated. Remaining entries
278/// are zero. Callers can distinguish duplicate vs canonical slots by
279/// checking whether `buffer[offset]` equals `0xFF`.
280#[derive(Clone, Debug, PartialEq, Eq)]
281pub struct FrameInfo {
282    /// Number of accounts the loader would hand to the program.
283    pub account_count: usize,
284    /// Byte range of the instruction data within the original buffer.
285    pub instruction_data_range: core::ops::Range<usize>,
286    /// Byte offset of the 32-byte program id within the original buffer.
287    pub program_id_offset: usize,
288    /// Byte offsets of each account slot, indexable 0..account_count.
289    pub slot_offsets: [usize; MAX_SAFE_ACCOUNT_SLOTS],
290}
291
292/// Errors returned by the safe parser.
293#[derive(Clone, Copy, Debug, PartialEq, Eq)]
294pub enum FrameError {
295    /// Buffer ended before the full frame could be parsed.
296    UnexpectedEof { needed: usize, at: usize },
297    /// Account count exceeds the compiled-in cap (256).
298    AccountCountOutOfRange(u64),
299    /// Duplicate marker refers to a non-earlier slot (forward ref or self).
300    MalformedDuplicateMarker { slot: usize, marker: u8 },
301    /// Data length field larger than the remaining buffer.
302    DataLenOutOfRange { slot: usize, data_len: u64 },
303    /// Arithmetic overflow while computing the next slot offset.
304    OffsetOverflow { slot: usize },
305}
306
307impl core::fmt::Display for FrameError {
308    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
309        match self {
310            Self::UnexpectedEof { needed, at } => {
311                write!(f, "unexpected EOF: need {needed} bytes at offset {at}")
312            }
313            Self::AccountCountOutOfRange(n) => {
314                write!(f, "account count {n} exceeds cap 256")
315            }
316            Self::MalformedDuplicateMarker { slot, marker } => {
317                write!(
318                    f,
319                    "malformed duplicate marker at slot {slot}: marker {marker} does not refer to an earlier slot"
320                )
321            }
322            Self::DataLenOutOfRange { slot, data_len } => {
323                write!(
324                    f,
325                    "slot {slot}: data_len {data_len} exceeds remaining buffer"
326                )
327            }
328            Self::OffsetOverflow { slot } => {
329                write!(f, "slot {slot}: offset arithmetic overflow")
330            }
331        }
332    }
333}
334
335/// Parse a loader-input byte buffer with full bounds checking.
336///
337/// This is the safe companion to `scan_instruction_frame` /
338/// `deserialize_accounts`. It returns `Err` (never panics, never reads
339/// out of bounds) for any malformed or truncated input, and preserves
340/// the exact same forward-duplicate-marker rejection rule that the
341/// pointer parser uses (see `malformed_duplicate_marker`).
342///
343/// Off-chain tools, fuzz harnesses, and RPC decoders should prefer
344/// this function. On-chain entrypoints continue to use the pointer
345/// parser for zero-overhead access.
346pub fn parse_instruction_frame_checked(buf: &[u8]) -> Result<FrameInfo, FrameError> {
347    // Helper: read a u64 LE at `pos`, bumping the cursor. Returns
348    // `UnexpectedEof` if the 8 bytes aren't in range.
349    fn read_u64_le(buf: &[u8], pos: &mut usize) -> Result<u64, FrameError> {
350        let end = pos
351            .checked_add(8)
352            .ok_or(FrameError::OffsetOverflow { slot: 0 })?;
353        let slice = buf.get(*pos..end).ok_or(FrameError::UnexpectedEof {
354            needed: 8,
355            at: *pos,
356        })?;
357        let mut bytes = [0u8; 8];
358        bytes.copy_from_slice(slice);
359        *pos = end;
360        Ok(u64::from_le_bytes(bytes))
361    }
362
363    fn read_u8(buf: &[u8], pos: &mut usize) -> Result<u8, FrameError> {
364        let byte = *buf.get(*pos).ok_or(FrameError::UnexpectedEof {
365            needed: 1,
366            at: *pos,
367        })?;
368        *pos += 1;
369        Ok(byte)
370    }
371
372    fn advance(buf: &[u8], pos: &mut usize, n: usize) -> Result<(), FrameError> {
373        let end = pos
374            .checked_add(n)
375            .ok_or(FrameError::OffsetOverflow { slot: 0 })?;
376        if end > buf.len() {
377            return Err(FrameError::UnexpectedEof {
378                needed: n,
379                at: *pos,
380            });
381        }
382        *pos = end;
383        Ok(())
384    }
385
386    let mut pos = 0usize;
387    let account_count = read_u64_le(buf, &mut pos)?;
388    if account_count > MAX_SAFE_ACCOUNT_SLOTS as u64 {
389        return Err(FrameError::AccountCountOutOfRange(account_count));
390    }
391    let account_count = account_count as usize;
392
393    let mut slot_offsets = [0usize; MAX_SAFE_ACCOUNT_SLOTS];
394
395    for slot in 0..account_count {
396        let slot_start = pos;
397        slot_offsets[slot] = slot_start;
398
399        let marker = read_u8(buf, &mut pos)?;
400        if marker == u8::MAX {
401            // Canonical account: the remaining 87 bytes of RuntimeAccount
402            // follow (we already consumed the marker byte).
403            advance(buf, &mut pos, RuntimeAccount::SIZE - 1).map_err(|_| {
404                FrameError::UnexpectedEof {
405                    needed: RuntimeAccount::SIZE - 1,
406                    at: pos,
407                }
408            })?;
409            // data_len lives at offset 80 in RuntimeAccount; we read it
410            // directly from the slot header. Offset within this slot:
411            // borrow_state(1) + flags(3) + resize_delta(4) + address(32) +
412            // owner(32) + lamports(8) = 80 -> data_len(8).
413            let data_len_pos = slot_start
414                .checked_add(80)
415                .ok_or(FrameError::OffsetOverflow { slot })?;
416            let mut dl_bytes = [0u8; 8];
417            let dl_slice =
418                buf.get(data_len_pos..data_len_pos + 8)
419                    .ok_or(FrameError::UnexpectedEof {
420                        needed: 8,
421                        at: data_len_pos,
422                    })?;
423            dl_bytes.copy_from_slice(dl_slice);
424            let data_len = u64::from_le_bytes(dl_bytes);
425
426            // data_bytes + realloc reserve + u128 alignment padding + rent_epoch
427            let data_sz: usize = (data_len as usize)
428                .checked_add(MAX_PERMITTED_DATA_INCREASE)
429                .ok_or(FrameError::DataLenOutOfRange { slot, data_len })?;
430            advance(buf, &mut pos, data_sz)
431                .map_err(|_| FrameError::DataLenOutOfRange { slot, data_len })?;
432            let pad = pos.wrapping_neg() & (BPF_ALIGN_OF_U128 - 1);
433            advance(buf, &mut pos, pad).map_err(|_| FrameError::UnexpectedEof {
434                needed: pad,
435                at: pos,
436            })?;
437            advance(buf, &mut pos, 8)
438                .map_err(|_| FrameError::UnexpectedEof { needed: 8, at: pos })?;
439        } else {
440            // Duplicate marker: must refer to a strictly earlier slot.
441            // This is the Hopper Safety Audit Must-Fix #1 invariant.
442            let duplicate_of = marker as usize;
443            if duplicate_of >= slot {
444                return Err(FrameError::MalformedDuplicateMarker { slot, marker });
445            }
446            // 7 padding bytes follow the marker.
447            advance(buf, &mut pos, 7)
448                .map_err(|_| FrameError::UnexpectedEof { needed: 7, at: pos })?;
449        }
450    }
451
452    // Instruction data: u64 LE length prefix + bytes.
453    let ix_data_len = read_u64_le(buf, &mut pos)? as usize;
454    let ix_start = pos;
455    advance(buf, &mut pos, ix_data_len).map_err(|_| FrameError::UnexpectedEof {
456        needed: ix_data_len,
457        at: pos,
458    })?;
459    let instruction_data_range = ix_start..pos;
460
461    // 32-byte program id trailer.
462    let program_id_offset = pos;
463    advance(buf, &mut pos, 32).map_err(|_| FrameError::UnexpectedEof {
464        needed: 32,
465        at: pos,
466    })?;
467
468    Ok(FrameInfo {
469        account_count,
470        instruction_data_range,
471        program_id_offset,
472        slot_offsets,
473    })
474}
475
476#[cfg(test)]
477mod checked_parser_tests {
478    use super::*;
479
480    /// Size of the single-account canonical frame used by tests.
481    /// 8 (account_count) + 88 (RuntimeAccount) + 10240 (realloc reserve)
482    /// + 0 (already u128-aligned at 10336) + 8 (rent_epoch)
483    /// + 8 (ix_data_len) + 32 (program_id) = 10384
484    const MINIMAL_FRAME_LEN: usize = 8 + 88 + MAX_PERMITTED_DATA_INCREASE + 0 + 8 + 8 + 32;
485
486    /// Build a valid one-canonical-account frame with zero-byte data.
487    fn build_minimal_frame() -> [u8; MINIMAL_FRAME_LEN] {
488        let mut buf = [0u8; MINIMAL_FRAME_LEN];
489        buf[0..8].copy_from_slice(&1u64.to_le_bytes()); // account_count = 1
490        buf[8] = 0xFF; // marker = canonical
491                       // remaining bytes of RuntimeAccount stay zero
492                       // realloc reserve stays zero
493                       // rent_epoch zero
494                       // ix_data_len = 0 (already zero)
495                       // program_id stays zero
496        buf
497    }
498
499    #[test]
500    fn parses_minimal_valid_frame() {
501        let buf = build_minimal_frame();
502        let frame = parse_instruction_frame_checked(&buf).expect("well-formed");
503        assert_eq!(frame.account_count, 1);
504        assert_eq!(frame.instruction_data_range.len(), 0);
505        assert_eq!(frame.program_id_offset + 32, buf.len());
506    }
507
508    #[test]
509    fn truncated_header_is_rejected() {
510        let buf = [0u8; 4]; // less than 8 bytes = no room for account_count
511        let err = parse_instruction_frame_checked(&buf).unwrap_err();
512        assert!(matches!(err, FrameError::UnexpectedEof { .. }));
513    }
514
515    #[test]
516    fn oversized_account_count_is_rejected() {
517        let mut buf = [0u8; 8];
518        buf.copy_from_slice(&1_000u64.to_le_bytes());
519        let err = parse_instruction_frame_checked(&buf).unwrap_err();
520        assert!(matches!(err, FrameError::AccountCountOutOfRange(1000)));
521    }
522
523    #[test]
524    fn forward_duplicate_marker_is_rejected() {
525        // 2-account frame where slot 0 is a duplicate of slot 1
526        // (forward reference). Must be rejected.
527        let mut buf = [0u8; 16];
528        buf[0..8].copy_from_slice(&2u64.to_le_bytes());
529        buf[8] = 1; // slot 0 marker = 1 (forward ref)
530        let err = parse_instruction_frame_checked(&buf).unwrap_err();
531        assert!(matches!(
532            err,
533            FrameError::MalformedDuplicateMarker { slot: 0, marker: 1 }
534        ));
535    }
536
537    #[test]
538    fn self_duplicate_marker_is_rejected() {
539        // Slot 0 marker=0 is self-reference: forbidden.
540        let mut buf = [0u8; 16];
541        buf[0..8].copy_from_slice(&1u64.to_le_bytes());
542        buf[8] = 0; // marker = 0, referring to slot 0 itself
543        let err = parse_instruction_frame_checked(&buf).unwrap_err();
544        assert!(matches!(
545            err,
546            FrameError::MalformedDuplicateMarker { slot: 0, marker: 0 }
547        ));
548    }
549
550    #[test]
551    fn arbitrary_short_input_never_panics() {
552        // Bounds-checking contract: feeding every length from 0..=256
553        // bytes of zeroes must never panic or UB.
554        let buf = [0u8; 256];
555        for len in 0..=256 {
556            let _ = parse_instruction_frame_checked(&buf[..len]);
557        }
558    }
559
560    #[test]
561    fn arbitrary_ff_input_never_panics() {
562        let buf = [0xFFu8; 256];
563        for len in 0..=256 {
564            let _ = parse_instruction_frame_checked(&buf[..len]);
565        }
566    }
567}