use elf::{
abi::{EM_RISCV, ET_EXEC, PF_W, PF_X, PT_LOAD, PT_NOTE},
endian::LittleEndian,
file::Class,
segment::ProgramHeader,
ElfBytes,
};
use eyre::OptionExt;
use hashbrown::HashMap;
use sp1_primitives::consts::{
INSTRUCTION_WORD_SIZE, MAXIMUM_MEMORY_SIZE, NOTE_UNTRUSTED_PROGRAM_ENABLED, PAGE_SIZE,
STACK_TOP,
};
use std::sync::Arc;
#[derive(Debug, Clone)]
pub(crate) struct Elf {
pub(crate) instructions: Vec<u32>,
pub(crate) pc_start: u64,
pub(crate) pc_base: u64,
pub(crate) page_prot_image: HashMap<u64, u8>,
pub(crate) enable_untrusted_programs: bool,
pub(crate) memory_image: Arc<HashMap<u64, u64>>,
pub(crate) function_symbols: Vec<(String, u64, u64)>,
}
impl Elf {
#[must_use]
pub(crate) fn new(
instructions: Vec<u32>,
pc_start: u64,
pc_base: u64,
memory_image: HashMap<u64, u64>,
page_prot_image: HashMap<u64, u8>,
enable_untrusted_programs: bool,
function_symbols: Vec<(String, u64, u64)>,
) -> Self {
Self {
instructions,
pc_start,
pc_base,
memory_image: Arc::new(memory_image),
page_prot_image,
enable_untrusted_programs,
function_symbols,
}
}
#[allow(clippy::too_many_lines)]
pub(crate) fn decode(input: &[u8]) -> eyre::Result<Self> {
let mut image: HashMap<u64, u64> = HashMap::new();
let mut page_prot_image = HashMap::new();
let elf = ElfBytes::<LittleEndian>::minimal_parse(input)?;
if elf.ehdr.class != Class::ELF32 && elf.ehdr.class != Class::ELF64 {
eyre::bail!("must be a 32-bit or 64-bit elf");
} else if elf.ehdr.e_machine != EM_RISCV {
eyre::bail!("must be a riscv machine");
} else if elf.ehdr.e_type != ET_EXEC {
eyre::bail!("must be executable");
}
let entry = elf.ehdr.e_entry;
if entry == MAXIMUM_MEMORY_SIZE || !entry.is_multiple_of(4) {
eyre::bail!("invalid entrypoint, entry: {}", entry);
}
let segments = elf.segments().ok_or_else(|| eyre::eyre!("failed to get segments"))?;
if segments.len() > 256 {
eyre::bail!("too many program headers");
}
let mut instructions: Vec<u32> = Vec::new();
let mut base_address = None;
let mut prev_segment_end_addr = None;
let mut enable_untrusted_programs = false;
for segment in segments.iter() {
if segment.p_type == PT_LOAD {
prev_segment_end_addr = Self::process_load_segment(
&segment,
input,
&mut instructions,
&mut base_address,
&mut image,
&mut page_prot_image,
prev_segment_end_addr,
)?;
}
if (segment.p_type == PT_NOTE) && !enable_untrusted_programs {
enable_untrusted_programs = Self::process_note_segment(&segment, input)?;
}
}
if base_address.is_none() {
eyre::bail!("no executable (PF_X) segments found");
}
if instructions.is_empty() || instructions.len() > (1 << 22) {
eyre::bail!("invalid number of instructions");
}
#[cfg(not(feature = "profiling"))]
let function_symbols = Vec::new();
#[cfg(feature = "profiling")]
let function_symbols =
elf.symbol_table()?.map_or_else(Vec::new, |(symbol_table, string_table)| {
symbol_table
.iter()
.filter(|sym| sym.st_symtype() == elf::abi::STT_FUNC)
.map(|sym| {
let name = string_table.get(sym.st_name as usize).unwrap_or("");
let demangled_name = rustc_demangle::demangle(name).to_string();
let size = sym.st_size;
let start_address = sym.st_value;
(demangled_name, start_address, size)
})
.collect()
});
Ok(Elf::new(
instructions,
entry,
base_address.unwrap(),
image,
page_prot_image,
enable_untrusted_programs,
function_symbols,
))
}
fn process_load_segment(
segment: &ProgramHeader,
input: &[u8],
instructions: &mut Vec<u32>,
base_address: &mut Option<u64>,
image: &mut HashMap<u64, u64>,
page_prot_image: &mut HashMap<u64, u8>,
prev_segment_end_addr: Option<u64>,
) -> eyre::Result<Option<u64>> {
let file_size = segment.p_filesz;
if file_size == MAXIMUM_MEMORY_SIZE {
eyre::bail!("invalid segment file_size");
}
let mem_size = segment.p_memsz;
if mem_size == MAXIMUM_MEMORY_SIZE {
eyre::bail!("Invalid segment mem_size");
}
let vaddr = segment.p_vaddr;
let offset = segment.p_offset;
let is_execute = (segment.p_flags & PF_X) != 0;
if is_execute && base_address.is_none() {
*base_address = Some(vaddr);
}
if vaddr < STACK_TOP {
eyre::bail!("ELF has a segment that is below the STACK_TOP");
}
if (segment.p_flags & PF_X) != 0 && (segment.p_flags & PF_W) != 0 {
eyre::bail!("ELF has a segment that is both writable and executable");
}
let step_size = INSTRUCTION_WORD_SIZE;
if let Some(prev_last_addr) = prev_segment_end_addr {
eyre::ensure!(prev_last_addr <= vaddr, "unsupported elf structure");
}
let end = vaddr
.checked_add(mem_size)
.ok_or_else(|| eyre::eyre!("address overflow in segment"))?;
if !vaddr.is_multiple_of(step_size as u64) {
eyre::bail!("segment vaddr is not aligned");
}
let last_addr = Some(vaddr.checked_add(mem_size).ok_or_eyre("last addr overflow")?);
if (segment.p_flags & PF_X) != 0 {
if base_address.is_none() {
*base_address = Some(vaddr);
eyre::ensure!(
base_address.unwrap() > 0x20,
"base address {} should be greater than 0x20",
base_address.unwrap()
);
} else {
let instr_len: u64 = INSTRUCTION_WORD_SIZE
.checked_mul(instructions.len())
.ok_or_eyre("instructions length overflow")?
.try_into()?;
let last_instruction_addr = base_address
.unwrap()
.checked_add(instr_len)
.ok_or_eyre("instruction addr overflow")?;
eyre::ensure!(vaddr == last_instruction_addr, "unsupported elf structure");
}
}
for addr in (vaddr..end).step_by(step_size) {
if addr >= vaddr + file_size {
image.insert(addr - addr % 8, 0);
continue;
}
let mut word = 0u64;
let offset_in_file = offset + (addr - vaddr);
let bytes_to_read = (step_size as u64).min(file_size - (addr - vaddr));
for i in 0..bytes_to_read {
let file_idx = (offset_in_file + i) as usize;
let byte = input
.get(file_idx)
.ok_or_else(|| eyre::eyre!("failed to read segment offset"))?;
word |= u64::from(*byte) << (8 * i);
}
if addr.is_multiple_of(8) {
image
.entry(addr)
.and_modify(|value| {
*value += word;
})
.or_insert_with(|| word);
} else {
assert!(addr % 8 == 4);
image
.entry(addr - 4)
.and_modify(|value| {
*value += word << 32;
})
.or_insert_with(|| word << 32);
}
if is_execute {
instructions.push(word as u32);
}
}
let page_start_addr = vaddr - vaddr % PAGE_SIZE as u64;
for page_start_addr in (page_start_addr..end).step_by(PAGE_SIZE) {
let page_idx = page_start_addr / PAGE_SIZE as u64;
page_prot_image.insert(page_idx, segment.p_flags.try_into().unwrap());
}
Ok(last_addr)
}
fn process_note_segment(segment: &ProgramHeader, input: &[u8]) -> eyre::Result<bool> {
let note_segment_offset: usize = segment.p_offset.try_into()?;
let note_segment_size: usize = segment.p_filesz.try_into()?;
let note_segment =
input[note_segment_offset..note_segment_offset + note_segment_size].to_vec();
let mut note_offset: usize = 0;
while note_offset < note_segment_size {
let name_size: usize =
u32::from_le_bytes(note_segment[note_offset..note_offset + 4].try_into()?)
.try_into()?;
note_offset += 4;
let desc_size: usize =
u32::from_le_bytes(note_segment[note_offset..note_offset + 4].try_into()?)
.try_into()?;
note_offset += 4;
let note_type: u32 =
u32::from_le_bytes(note_segment[note_offset..note_offset + 4].try_into()?);
note_offset += 4;
let name =
String::from_utf8(note_segment[note_offset..note_offset + name_size].to_vec())?;
note_offset += name_size + (name_size % 4);
let desc =
String::from_utf8(note_segment[note_offset..note_offset + desc_size].to_vec())?;
note_offset += desc_size + (desc_size % 4);
if name == "SUCCINCT" && note_type == NOTE_UNTRUSTED_PROGRAM_ENABLED && desc == "1" {
return Ok(true);
}
}
Ok(false)
}
}