gpcas_forwardcom 0.1.1

ForwardCom instruction set architecture (ISA) properties for use with the General Purpose Core Architecture Simulator (GPCAS).
Documentation
// Filename: program.rs
// Author:	 Kai Rese
// Version:	 0.4
// Date:	 12-10-2022 (DD-MM-YYYY)
// Library:  gpcas_forwardcom
//
// Copyright (c) 2022 Kai Rese
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program. If not, see
// <https://www.gnu.org/licenses/>.

//! This module contains a [`Program`] struct that contains a program image. The image is ready for
//! emulation.
//!
//! Currently, it also contains a [`Segment`] struct for the ForwardCom memory virtualization.

use gpcas_isa::ELFFileHeaderFront;

/// The stack size for the program image.
const STACK_SIZE: u64 = 0x10_0000; // Does currently not get read out from the ELF file?
/// The maximum vector size.
///
/// This is needed because the specification defines empty space after every segment, so a variable
/// length vector can be read with a guarantee of no resulting access violation. Because this is
/// hard coded into the program file, it is static for now.
const VECTOR_SIZE: u64 = 0x100; // The forwardcom linker assumes this size :|
/// Size of an empty space at the beginning of the program image. This space is used for program
/// arguments.
const ENVIRONMENT_SIZE: u64 = 0x100; // Why this static size? Do we not know the environment size?

/// This flag is set for segments of the executable file if the segment uses the instruction Pointer
/// as address base.
const BASE_IP: u32 = 0x1000;
/// This flag is set for segments of the executable file if the segment uses the global data
/// pointer as address base.
const BASE_DATAP: u32 = 0x2000;
/// This flag is set for segments of the executable file if the segment uses the thread-local data
/// pointer as address base.
const BASE_THREADP: u32 = 0x4000;

/// This flag is set for segments of the executable file if the segment allocates memory in the
/// loaded program image.
const FLAG_ALLOCATE: u32 = 0x100;

/// This flag indicates execute permission for a segment of the program file.
const PERMISSION_EXECUTE: u32 = 0x1;
/// This flag indicates write permission for a segment of the program file.
const PERMISSION_WRITE: u32 = 0x2;
/// This flag indicates read permission for a segment of the program file.
const PERMISSION_READ: u32 = 0x4;

/// Contains the program image. The image is ready for emulation.
pub struct Program {
    /// The image memory data.
    pub data: Vec<u8>,
    /// The entry point address as offset into the image data.
    pub start_address: usize,
    /// The stack address upon program start as offset into the image data.
    pub stack_address: usize,
    /// The base address of the instruction pointer. Points to the beginning of the text segment.
    pub ip_base: u64,
    /// The data pointer. Points to the beginning of the BSS segment (uninitialized data).
    pub datap: u64,
    /// The thread-local data pointer. Points to the beginning of a thread-local data segment if
    /// there is one.
    pub threadp: u64,
    /// The memory map. Contains a list of program segments, their offset if needed and their
    /// access permissions.
    pub memory_map: Vec<Segment>,
}

/// A segment of the memory map.
///
/// ForwardCom doesn't use a paging system for memory virtualization. Instead, it uses a map of
/// variable sized memory segments. Each segment has an offset from the virtual to the physical
/// address, as well as flags for read, write and execute permission.
pub struct Segment {
    /// Address of the beginning of the segment.
    pub address: u64,
    /// Contains the segment permissions in the first three bits, as well as the address offset in
    /// the other bits.
    addend: u64,
}

impl Segment {
    /// Returns the permission flags of the segment.
    pub fn flags(&self) -> u64 {
        self.addend & 0x7
    }

    /// Returns the address offset.
    pub fn offset(&self) -> u64 {
        (self.addend as i64 & -0x8) as u64
    }
}

/// Contains the structural information needed to build the program image.
pub struct ForwardComELFFile {
    /// The ELF header of the file.
    header: ForwardComELFFileHeader,
    /// A list of all program headers in the file.
    program_headers: Vec<ProgramHeader>,
}

/// Contains the header of the ELF file.
#[derive(Debug, Default)]
#[repr(C)]
struct ForwardComELFFileHeader {
    /// The first part of the header. This is the same for all ELF ISAs, so the struct from the base
    /// crate is used.
    pub front: ELFFileHeaderFront,
    /// The entry point address.
    pub program_entry_location: u64,
    /// The offset of the program header table in bytes from the beginning of the file.
    pub program_header_table_offset: u64,
    /// The offset of the section header table in bytes from the beginning of the file.
    pub section_header_table_offset: u64,
    /// Different properties of the ELF file, for example the type.
    ///
    /// The file has to be executable to be emulated.
    pub flags: u32,
    /// The size of the ELF header in bytes.
    pub header_size: u16,
    /// The size of each section header in the file.
    pub program_header_table_entry_size: u16,
    /// The program header count in the file.
    pub program_header_table_entry_count: u16,
    /// The size of each section header in the file.
    pub section_header_table_entry_size: u16,
    /// The section header count in the file.
    pub section_header_table_entry_count: u32,
    /// The index in the section header table for the string table segment.
    pub section_header_string_table_index: u32,
    // now for some special ForwardCom fields
    /// The maximum count of vector registers that get pushed onto the stack. Because of variable
    /// vector register size, this is needed to calculate the total stack size.
    pub stack_vector_count: u32,
    /// The size of the stack in bytes, without vectors. Can be defined exactly if the program
    /// doesn't use recursion.
    pub stack_size: u64,
    /// The base address of the instruction pointer. Points to the beginning of the text segment.
    pub ip_base: u64,
    /// The data pointer. Points to the beginning of the BSS segment (uninitialized data).
    pub datap_base: u64,
    /// The thread-local data pointer. Points to the beginning of a thread-local data segment if
    /// there is one.
    pub threadp_base: u64,
}

impl From<[u8; std::mem::size_of::<ForwardComELFFileHeader>()]> for ForwardComELFFileHeader {
    fn from(from: [u8; std::mem::size_of::<ForwardComELFFileHeader>()]) -> Self {
        unsafe { std::mem::transmute(from) }
    }
}

/// Describes a program segment in the executable file.
#[derive(Debug, Default)]
#[repr(C)]
struct ProgramHeader {
    pub segment_type: u32,
    pub flags: u32,
    /// The file offset in bytes where the content referenced by the header is stored.
    pub file_offset: u64,
    pub virtual_address: u64,
    pub physical_address: u64,
    /// The size of the program segment in the file.
    pub file_size: u64,
    /// The size of the program segment in the program image. Differences to the size in the file
    /// are mostly due to alignment and empty space.
    pub memory_size: u64,
    pub alignment: u8,
    /// Unused free space to align the struct correctly.
    _padding: [u8; 7],
}

impl From<[u8; std::mem::size_of::<ProgramHeader>()]> for ProgramHeader {
    fn from(from: [u8; std::mem::size_of::<ProgramHeader>()]) -> Self {
        unsafe { std::mem::transmute(from) }
    }
}

/// Creates a program image from an executable file.
pub fn load_forwardcom_elf(program_data: &[u8]) -> Result<Program, String> {
    let elf_data = parse_forwardcom_elf(program_data)?;

    // Calculate needed memory size
    let program_size = elf_data
        .program_headers
        .iter()
        .fold(VECTOR_SIZE + ENVIRONMENT_SIZE + STACK_SIZE, |acc, entry| {
            acc + align(entry.memory_size, entry.alignment)
        });
    let mut program_memory: Vec<u8> = vec![0; program_size as usize];

    let mut current_offset = ENVIRONMENT_SIZE;
    let mut last_flags = 0x0;
    let mut ip_base = 0x0;
    let mut datap = 0x0;
    let mut threadp = 0x0;
    let mut stack_address = 0x0;
    let mut memory_map = Vec::new();

    for program_header in elf_data.program_headers {
        current_offset = align(current_offset as u64, program_header.alignment);

        // new segment
        if program_header.virtual_address == 0 {
            // find base pointers
            match program_header.flags & (BASE_IP | BASE_DATAP | BASE_THREADP) {
                BASE_IP => ip_base = current_offset,
                BASE_DATAP => datap = current_offset,
                BASE_THREADP => threadp = current_offset,
                _ => return Err("Found program segment without address base!".to_string()),
            }

            // insert the stack
            if last_flags & BASE_IP > 0 {
                last_flags = BASE_DATAP | FLAG_ALLOCATE | PERMISSION_WRITE | PERMISSION_READ;
                memory_map.push(Segment {
                    address: current_offset,
                    addend: last_flags as u64,
                });
                current_offset = align(current_offset + STACK_SIZE, 3);
                datap = current_offset;
                stack_address = current_offset;
            }
        }

        // build memory map
        if (program_header.flags & (PERMISSION_EXECUTE | PERMISSION_WRITE | PERMISSION_READ))
            != (last_flags & (PERMISSION_EXECUTE | PERMISSION_WRITE | PERMISSION_READ))
        {
            memory_map.push(Segment {
                address: current_offset,
                addend: (program_header.flags
                    & (PERMISSION_EXECUTE | PERMISSION_WRITE | PERMISSION_READ))
                    as u64,
            })
        }

        unsafe {
            program_memory
                .as_mut_ptr()
                .add(current_offset as usize)
                .copy_from_nonoverlapping(
                    program_data
                        .as_ptr()
                        .add(program_header.file_offset as usize),
                    program_header.file_size as usize,
                );
        }
        current_offset = align(
            current_offset + program_header.memory_size,
            program_header.alignment,
        );
        last_flags = program_header.flags;
    }

    // append safety margin for vector reading
    if (last_flags & PERMISSION_READ) > 0 {
        current_offset = align(current_offset + VECTOR_SIZE, 3);
    }

    // There was no data segment, append the stack here
    if stack_address == 0 {
        last_flags = BASE_DATAP | FLAG_ALLOCATE | PERMISSION_WRITE | PERMISSION_READ;
        memory_map.push(Segment {
            address: current_offset,
            addend: last_flags as u64,
        });
        current_offset = align(current_offset + STACK_SIZE, 3);
        stack_address = current_offset;
    }

    // terminating memory map entry
    memory_map.push(Segment {
        address: current_offset,
        addend: 0x0,
    });

    Ok(Program {
        data: program_memory,
        start_address: (ip_base + elf_data.header.program_entry_location) as usize,
        stack_address: stack_address as usize,
        ip_base: ip_base + elf_data.header.ip_base,
        datap: datap + elf_data.header.datap_base,
        threadp: threadp + elf_data.header.threadp_base,
        memory_map,
    })
}

/// Aligns an address to the specified alignment.
#[inline]
fn align(base: u64, alignment: u8) -> u64 {
    (((base + (1 << alignment as u64) - 1) as i64) & -(1 << alignment as i64)) as u64
}

/// Parses the executable file header into a [`ForwardComELFFile`] struct.
///
/// This basically just does validity tests and maps the header to the struct.
fn parse_forwardcom_elf(program_data: &[u8]) -> Result<ForwardComELFFile, String> {
    if program_data.len() < std::mem::size_of::<ForwardComELFFileHeader>() {
        Err("The header is too small to be a valid ForwardCom ELF file!".to_string())
    } else {
        let header_data: [u8; std::mem::size_of::<ForwardComELFFileHeader>()] = program_data
            [..std::mem::size_of::<ForwardComELFFileHeader>()]
            .try_into()
            .unwrap();
        let header: ForwardComELFFileHeader = header_data.into();
        let mut program_headers: Vec<ProgramHeader> = Vec::new();
        if header.front.class_id != 2 {
            Err("The file is not a 64 bit file!".to_string())
        } else if header.front.data_encoding_id != 1 {
            Err("The file is not encoded in little endian format!".to_string())
        } else if header.front.elf_version != 1 {
            Err("The file is encoded in an unsupported ELF version!".to_string())
        } else if header.front.abi_id != 250 {
            Err("The file does not use the ForwardCom ABI!".to_string())
        } else if header.front.abi_version != 1 {
            Err("The file uses an unsupported version of the ForwardCom ABI!".to_string())
        } else if header.front.file_type != 2 {
            Err("The file is not an executable!".to_string())
        } else if header.front.machine != 0x6233 {
            Err("The file is not for ForwardCom!".to_string())
        } else if (header.flags & 0x1) > 0 {
            Err("The file contains unresolved references!".to_string())
        } else if header.header_size != 104 {
            Err("The file header has the wrong size!".to_string())
        } else if header.program_header_table_entry_size as usize
            != std::mem::size_of::<ProgramHeader>()
        {
            Err("The file contains program headers of unsupported size!".to_string())
        } else {
            // valid file, yay
            let offset = header.program_header_table_offset as usize;
            for i in 0..header.program_header_table_entry_count as usize {
                let offset = offset + i * std::mem::size_of::<ProgramHeader>();
                let program_header_data: [u8; std::mem::size_of::<ProgramHeader>()] = program_data
                    [offset..offset + std::mem::size_of::<ProgramHeader>()]
                    .try_into()
                    .unwrap();
                program_headers.push(program_header_data.into());
            }
            Ok(ForwardComELFFile {
                header,
                program_headers,
            })
        }
    }
}