pe-assembler 0.1.1

PE/COFF assembler for Windows instruction sets - strongly typed, object-oriented, zero-dependency core
Documentation
use gaia_binary::{LittleEndian, ReadBytesExt};
use gaia_types::{helpers::Architecture, GaiaError};
use serde::{Deserialize, Serialize};
use std::io::Read;

/// COFF file header structure
///
/// Contains basic information for the COFF (Common Object File Format) format,
/// defining key information such as target machine type, number of sections, and timestamp.
#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
pub struct CoffHeader {
    /// Target machine type, such as x86, x64, ARM, etc.
    pub machine: u16,
    /// Number of sections in the file
    pub number_of_sections: u16,
    /// Timestamp indicating when the file was created or linked
    pub time_date_stamp: u32,
    /// File offset of the symbol table, or 0 if none exists
    pub pointer_to_symbol_table: u32,
    /// Number of symbols in the symbol table
    pub number_of_symbols: u32,
    /// Size of the optional header in bytes
    pub size_of_optional_header: u16,
    /// File characteristics flags describing various attributes of the file
    pub characteristics: u16,
}

impl CoffHeader {
    /// Create a new COFF header with core fields
    pub fn new(machine: u16, number_of_sections: u16) -> Self {
        CoffHeader {
            machine,
            number_of_sections,
            time_date_stamp: 0,
            pointer_to_symbol_table: 0,
            number_of_symbols: 0,
            size_of_optional_header: 0,
            characteristics: 0,
        }
    }

    /// Set timestamp
    pub fn with_timestamp(mut self, time_date_stamp: u32) -> Self {
        self.time_date_stamp = time_date_stamp;
        self
    }

    /// Set symbol table information
    pub fn with_symbol_table(mut self, pointer_to_symbol_table: u32, number_of_symbols: u32) -> Self {
        self.pointer_to_symbol_table = pointer_to_symbol_table;
        self.number_of_symbols = number_of_symbols;
        self
    }

    /// Set optional header size
    pub fn with_optional_header_size(mut self, size_of_optional_header: u16) -> Self {
        self.size_of_optional_header = size_of_optional_header;
        self
    }

    /// Set file characteristics
    pub fn with_characteristics(mut self, characteristics: u16) -> Self {
        self.characteristics = characteristics;
        self
    }

    pub fn read<R: Read>(mut reader: R) -> Result<Self, GaiaError> {
        Ok(CoffHeader {
            machine: reader.read_u16::<LittleEndian>()?,
            number_of_sections: reader.read_u16::<LittleEndian>()?,
            time_date_stamp: reader.read_u32::<LittleEndian>()?,
            pointer_to_symbol_table: reader.read_u32::<LittleEndian>()?,
            number_of_symbols: reader.read_u32::<LittleEndian>()?,
            size_of_optional_header: reader.read_u16::<LittleEndian>()?,
            characteristics: reader.read_u16::<LittleEndian>()?,
        })
    }

    pub fn get_architecture(&self) -> Architecture {
        match self.machine {
            0x014C => Architecture::X86,
            0x8664 => Architecture::X86_64,
            0x0200 => Architecture::ARM32,
            0xAA64 => Architecture::ARM64,
            _ => Architecture::Unknown,
        }
    }
}

/// Section header structure
///
/// Contains metadata for a section in a COFF file, such as name, size,
/// position, and attributes. This structure doesn't contain the actual data of the section.
#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
pub struct SectionHeader {
    /// Section name, 8-byte ASCII string like ".text", ".data", etc.
    pub name: [u8; 8],
    /// Virtual size of the section in memory
    pub virtual_size: u32,
    /// Virtual address of the section (RVA) in memory
    pub virtual_address: u32,
    /// Size of raw data in the file
    pub size_of_raw_data: u32,
    /// File offset of the section
    pub pointer_to_raw_data: u32,
    /// File offset of the relocation table
    pub pointer_to_relocations: u32,
    /// File offset of the line numbers table
    pub pointer_to_line_numbers: u32,
    /// Number of relocation entries
    pub number_of_relocations: u16,
    /// Number of line number entries
    pub number_of_line_numbers: u16,
    /// Section characteristics flags describing attributes (read, write, execute, etc.)
    pub characteristics: u32,
}

impl SectionHeader {
    pub fn get_name(&self) -> &str {
        unsafe {
            let name = std::str::from_utf8_unchecked(&self.name);
            name.trim_end_matches('\0')
        }
    }
}

/// COFF symbol table entry
///
/// Represents a symbol in a COFF object file, containing symbol name, value, section number, etc.
/// Symbols can be functions, variables, labels, or other identifiers in the program.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CoffSymbol {
    /// Symbol name, stored in the string table if length exceeds 8 bytes
    pub name: String,
    /// Symbol value, usually an address or offset
    pub value: u32,
    /// Section number where the symbol resides: 0 for undefined, -1 for absolute, -2 for debug
    pub section_number: i16,
    /// Symbol type, describing the basic type of the symbol
    pub symbol_type: u16,
    /// Storage class, describing the scope and lifetime of the symbol
    pub storage_class: u8,
    /// Number of auxiliary symbols
    pub number_of_aux_symbols: u8,
}

/// COFF relocation item
///
/// Represents an item that requires address relocation at link time.
/// Relocation is the process of converting relative addresses to absolute addresses.
#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
pub struct CoffRelocation {
    /// Virtual address that needs relocation
    pub virtual_address: u32,
    /// Symbol table index pointing to the related symbol
    pub symbol_table_index: u32,
    /// Relocation type, defining how to perform the relocation
    pub relocation_type: u16,
}

/// COFF section structure
///
/// Represents a section in a COFF object file, containing the section header and data.
/// Similar to a PE section but used for object files instead of executables.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CoffSection {
    /// Section header info
    pub header: SectionHeader,
    /// Raw data of the section
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub data: Vec<u8>,
    /// Relocation table containing all relocation entries for this section
    pub relocations: Vec<CoffRelocation>,
}

/// COFF object file structure
///
/// Represents a complete COFF object file, containing header, sections, symbol table, etc.
/// COFF object files are intermediate files generated by compilers, containing unlinked code and data.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CoffObject {
    /// COFF header info
    pub header: CoffHeader,
    /// Collection of all sections
    pub sections: Vec<CoffSection>,
    /// Symbol table containing all symbol info
    pub symbols: Vec<CoffSymbol>,
    /// String table for storing long symbol names
    pub string_table: Vec<u8>,
}

/// Archive member header
///
/// Represents the header information for a member file in a static library.
/// Static libraries are collections of object files, each member having its own header.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ArchiveMemberHeader {
    /// Member file name
    pub name: String,
    /// File modification timestamp
    pub timestamp: u32,
    /// User ID
    pub user_id: u16,
    /// Group ID
    pub group_id: u16,
    /// File permission mode
    pub mode: u32,
    /// File size
    pub size: u32,
}

/// Archive member
///
/// Represents a member in a static library, containing a header and data.
/// Each member is typically a COFF object file.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ArchiveMember {
    /// Member header info
    pub header: ArchiveMemberHeader,
    /// Member data, usually the content of a COFF object file
    pub data: Vec<u8>,
    /// Parsed COFF object (if successfully parsed)
    pub coff_object: Option<CoffObject>,
}

/// Static library file structure
///
/// Represents a complete static library file (.lib), containing multiple object files.
/// Static libraries pack multiple object files into one for easier distribution and linking.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct StaticLibrary {
    /// Library signature, usually "!<arch>\n"
    pub signature: String,
    /// Collection of all member files
    pub members: Vec<ArchiveMember>,
    /// Symbol index table for fast symbol lookup
    pub symbol_index: Vec<(String, usize)>, // (symbol_name, member_index)
}

/// COFF file type enum
///
/// Distinguishes between different types of COFF-related file formats.
#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum CoffFileType {
    /// COFF object file (.obj)
    Object,
    /// Static library file (.lib)
    StaticLibrary,
    /// PE executable file (.exe)
    Executable,
    /// PE dynamic library file (.dll)
    DynamicLibrary,
}

/// COFF file info
///
/// Provides summary information for COFF-related files.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CoffInfo {
    /// File type
    pub file_type: CoffFileType,
    /// Target architecture
    pub target_arch: Architecture,
    /// Number of sections
    pub section_count: u16,
    /// Number of symbols
    pub symbol_count: u32,
    /// File size
    pub file_size: u64,
    /// Timestamp
    pub timestamp: u32,
}