solana-optimizer 0.1.1

A tool to optimize Solana smart contract bytecode with eBPF transformations
Documentation
use solana_rbpf::ebpf;
use solana_rbpf::elf::Executable;
use solana_rbpf::program::{BuiltinProgram, SBPFVersion, FunctionRegistry};
use solana_rbpf::vm::Config;
use std::sync::Arc;
use elf::ElfBytes;
use elf::endian::{AnyEndian, EndianParse}; // 导入 EndianParse
use elf::file::Class;
use std::fs;

#[derive(Debug, serde::Serialize)]
pub struct Issue {
    kind: String,
    offset: usize,
    desc: String,
}

pub struct Optimizer {
    insns: Vec<solana_rbpf::ebpf::Insn>,
    issues: Vec<Issue>,
    elf_bytes: Vec<u8>,
    text_section_idx: usize,
}

impl Optimizer {
    pub fn new(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
        let elf_bytes = fs::read(path)?;
        let elf = ElfBytes::<AnyEndian>::minimal_parse(&elf_bytes)?;
        let (shdrs_opt, strtab_opt) = elf.section_headers_with_strtab()?;
        let shdrs = shdrs_opt.ok_or("No section headers")?;
        let strtab = strtab_opt.ok_or("No string table")?;
        let text_section_idx = shdrs
            .iter()
            .position(|sh| strtab.get(sh.sh_name as usize).ok() == Some(".text"))
            .ok_or("No .text section")?;
        // 修改第 38、39 行:将 Option 转为 Result
        let text_section = shdrs.get(text_section_idx).map_err(|_| "Invalid text section index")?;
        let text_bytes = elf.section_data(&text_section)?.0;
        let insns = Self::disassemble_text_bytes(text_bytes)?;
        Ok(Self { insns, issues: Vec::new(), elf_bytes, text_section_idx })
    }

    fn disassemble_text_bytes(bytes: &[u8]) -> Result<Vec<solana_rbpf::ebpf::Insn>, Box<dyn std::error::Error>> {
        let mut insns = Vec::new();
        let mut offset = 0;
        while offset + 8 <= bytes.len() {
            let chunk = &bytes[offset..offset + 8];
            insns.push(ebpf::Insn {
                ptr: 0,
                opc: chunk[0],
                dst: chunk[1] & 0x0F,
                src: (chunk[1] >> 4) & 0x0F,
                off: i16::from_le_bytes([chunk[2], chunk[3]]),
                imm: i64::from_le_bytes([chunk[4], chunk[5], chunk[6], chunk[7], 0, 0, 0, 0]),
            });
            offset += 8;
        }
        Ok(insns)
    }

    pub fn remove_logs(&mut self) {
        let original_len = self.insns.len();
        self.insns.retain(|insn| {
            if insn.opc == 0x91 { // 假设 sol_log opcode
                self.issues.push(Issue {
                    kind: "LogRemoved".to_string(),
                    offset: insn.off as usize,
                    desc: "Removed redundant sol_log call".to_string(),
                });
                false
            } else {
                true
            }
        });
        println!("Removed {} log instructions", original_len - self.insns.len());
    }

    pub fn merge_loads(&mut self) {
        let mut i = 0;
        while i < self.insns.len() - 1 {
            if self.insns[i].opc == ebpf::LD_DW_IMM && self.insns[i + 1].opc == ebpf::LD_DW_IMM {
                if self.insns[i].src == self.insns[i + 1].src {
                    self.insns.remove(i + 1);
                    self.issues.push(Issue {
                        kind: "LoadMerged".to_string(),
                        offset: i,
                        desc: "Merged duplicate load instruction".to_string(),
                    });
                    continue;
                }
            }
            i += 1;
        }
        println!("Merged duplicate load instructions");
    }

    pub fn check_size(&mut self) {
        let size = self.insns.len() * 8;
        if size > 128 * 1024 {
            self.issues.push(Issue {
                kind: "SizeExceeded".to_string(),
                offset: 0,
                desc: format!("Program size {} bytes exceeds 128KB", size),
            });
        }
    }

    pub fn generate(&self) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
        use solana_rbpf::vm::TestContextObject;

        let loader = Arc::new(BuiltinProgram::<TestContextObject>::new_loader(
            Config::default(),
            FunctionRegistry::default(),
        ));

        let executable = Executable::from_text_bytes(
            &self.insns.iter().flat_map(|insn| {
                let imm_bytes = insn.imm.to_le_bytes();
                [
                    insn.opc,
                    (insn.dst & 0x0F) | ((insn.src & 0x0F) << 4),
                    insn.off.to_le_bytes()[0],
                    insn.off.to_le_bytes()[1],
                    imm_bytes[0],
                    imm_bytes[1],
                    imm_bytes[2],
                    imm_bytes[3],
                ]
            }).collect::<Vec<u8>>(),
            loader,
            SBPFVersion::V2,
            FunctionRegistry::default(),
        )?;
        let optimized_text = executable.get_text_bytes().1.to_vec();
        let elf = ElfBytes::<AnyEndian>::minimal_parse(&self.elf_bytes)?;
        let ehdr = elf.ehdr;
        let (shdrs_opt, _) = elf.section_headers_with_strtab()?;
        let shdrs = shdrs_opt.ok_or("No section headers")?;
        let mut new_shdrs: Vec<_> = shdrs.iter().collect();
        let mut elf_bytes = Vec::new();

        // 写入 ELF 头部
        let class_val = match ehdr.class {
            Class::ELF32 => elf::abi::ELFCLASS32,
            Class::ELF64 => elf::abi::ELFCLASS64,
            _ => elf::abi::ELFCLASS32,
        };
        // 修改第 148 行:使用 EndianParse 的 is_little 方法
        let endian_val = if ehdr.endianness.is_little() {
            elf::abi::ELFDATA2LSB
        } else {
            elf::abi::ELFDATA2MSB
        };
        let mut ehdr_bytes = Vec::new();
        ehdr_bytes.extend_from_slice(&[0x7f, b'E', b'L', b'F']); // ei_magic,4 bytes
        ehdr_bytes.extend_from_slice(&[class_val, endian_val, ehdr.version.try_into()?, ehdr.osabi]); // 4 bytes
        ehdr_bytes.extend_from_slice(&[ehdr.abiversion, 0, 0, 0, 0, 0, 0, 0]); // EI_PAD,8 bytes
        ehdr_bytes.extend_from_slice(&ehdr.e_type.to_le_bytes()); // 2 bytes
        ehdr_bytes.extend_from_slice(&ehdr.e_machine.to_le_bytes()); // 2 bytes
        ehdr_bytes.extend_from_slice(&ehdr.version.to_le_bytes()); // 4 bytes
        ehdr_bytes.extend_from_slice(&(ehdr.e_entry as u32).to_le_bytes()); // 4 bytes
        ehdr_bytes.extend_from_slice(&(ehdr.e_phoff as u32).to_le_bytes()); // 4 bytes
        ehdr_bytes.extend_from_slice(&(ehdr.e_shoff as u32).to_le_bytes()); // 4 bytes
        ehdr_bytes.extend_from_slice(&ehdr.e_flags.to_le_bytes()); // 4 bytes
        ehdr_bytes.extend_from_slice(&ehdr.e_ehsize.to_le_bytes()); // 2 bytes
        ehdr_bytes.extend_from_slice(&ehdr.e_phentsize.to_le_bytes()); // 2 bytes
        ehdr_bytes.extend_from_slice(&ehdr.e_phnum.to_le_bytes()); // 2 bytes
        ehdr_bytes.extend_from_slice(&ehdr.e_shentsize.to_le_bytes()); // 2 bytes
        ehdr_bytes.extend_from_slice(&ehdr.e_shnum.to_le_bytes()); // 2 bytes
        ehdr_bytes.extend_from_slice(&ehdr.e_shstrndx.to_le_bytes()); // 2 bytes
        elf_bytes.extend_from_slice(&ehdr_bytes);

        // 计算段数据起始偏移
        let mut offset = ehdr.e_ehsize as usize;
        let mut section_data = Vec::new();

        // 修改第 185、186 行:使用 iter_mut 避免借用冲突
        for (i, sh) in new_shdrs.iter_mut().enumerate() {
            let data = if i == self.text_section_idx {
                optimized_text.clone()
            } else {
                elf.section_data(sh)?.0.to_vec()
            };
            section_data.push((sh.sh_offset, data.clone()));
            sh.sh_offset = offset as u64; // 直接修改可变引用
            sh.sh_size = data.len() as u64; // 直接修改可变引用
            offset += data.len();
            offset = (offset + 7) & !7; // 8 字节对齐
        }

        // 更新段表偏移
        ehdr_bytes[32..36].copy_from_slice(&(offset as u32).to_le_bytes()); // e_shoff,4 bytes
        elf_bytes[0..52].copy_from_slice(&ehdr_bytes); // 固定为 52 字节

        // 写入段数据
        for (_, data) in section_data.iter() {
            elf_bytes.extend_from_slice(data);
            let padding = (8 - (data.len() % 8)) % 8;
            elf_bytes.extend_from_slice(&vec![0; padding]);
        }

        // 写入段表
        for sh in new_shdrs.iter() {
            let sh_bytes = [
                sh.sh_name.to_le_bytes(), // 4 bytes
                sh.sh_type.to_le_bytes(), // 4 bytes
                (sh.sh_flags as u32).to_le_bytes(), // 4 bytes
                (sh.sh_addr as u32).to_le_bytes(), // 4 bytes
                (sh.sh_offset as u32).to_le_bytes(), // 4 bytes
                (sh.sh_size as u32).to_le_bytes(), // 4 bytes
                sh.sh_link.to_le_bytes(), // 4 bytes
                sh.sh_info.to_le_bytes(), // 4 bytes
                (sh.sh_addralign as u32).to_le_bytes(), // 4 bytes
                (sh.sh_entsize as u32).to_le_bytes(), // 4 bytes
            ].concat();
            elf_bytes.extend_from_slice(&sh_bytes);
        }

        Ok(elf_bytes)
    }

    pub fn report(&self) -> String {
        serde_json::to_string_pretty(&self.issues).unwrap_or("[]".to_string())
    }
}

pub fn optimize_sbf(input_path: &str, output_path: &str) -> Result<String, Box<dyn std::error::Error>> {
    let mut optimizer = Optimizer::new(input_path)?;
    optimizer.remove_logs();
    optimizer.merge_loads();
    optimizer.check_size();
    let optimized_bytes = optimizer.generate()?;
    fs::write(output_path, optimized_bytes)?;
    Ok(optimizer.report())
}