pe-assembler 0.1.1

PE/COFF assembler for Windows instruction sets - strongly typed, object-oriented, zero-dependency core
Documentation
use crate::{
    helpers::CoffReader,
    types::{
        coff::{ArchiveMember, ArchiveMemberHeader, CoffFileType, CoffInfo, StaticLibrary},
        CoffHeader, CoffObject, SectionHeader,
    },
};
use gaia_binary::ReadBytesExt;
use gaia_types::{
    helpers::{Architecture, Url},
    GaiaDiagnostics, GaiaError,
};
use std::io::{Read, Seek};

/// LIB structure, lazy reader
#[derive(Debug)]
pub struct LibReader<R> {
    reader: R,
    url: Option<Url>,
    lazy_library: Option<StaticLibrary>,
    lazy_info: Option<CoffInfo>,
    errors: Vec<GaiaError>,
}

impl<R> LibReader<R> {
    pub fn new(reader: R) -> Self {
        Self { reader, url: None, lazy_library: None, lazy_info: None, errors: vec![] }
    }
    pub fn with_url(mut self, url: Url) -> Self {
        self.url = Some(url);
        self
    }
    pub fn finish(mut self) -> GaiaDiagnostics<StaticLibrary>
    where
        R: Read + Seek,
    {
        if self.lazy_library.is_none() {
            if let Err(e) = self.read_library() {
                return GaiaDiagnostics { result: Err(e), diagnostics: self.errors };
            }
        }
        match self.lazy_library {
            Some(s) => GaiaDiagnostics { result: Ok(s), diagnostics: self.errors },
            None => unreachable!(),
        }
    }
}

impl<R: Read + Seek> CoffReader<R> for LibReader<R> {
    fn get_viewer(&mut self) -> &mut R {
        &mut self.reader
    }

    fn add_diagnostics(&mut self, error: impl Into<GaiaError>) {
        self.errors.push(error.into())
    }

    fn get_coff_header(&mut self) -> Result<&CoffHeader, GaiaError> {
        Err(GaiaError::not_implemented("LibReader does not support direct reading of COFF headers, please use member objects"))
    }

    fn set_coff_header(&mut self, _head: CoffHeader) -> Option<CoffHeader> {
        None // LibReader does not support setting COFF headers
    }

    fn get_section_headers(&mut self) -> Result<&[SectionHeader], GaiaError> {
        Err(GaiaError::not_implemented(
            "LibReader does not support direct reading of section headers, please use member objects",
        ))
    }

    fn set_section_headers(&mut self, _headers: Vec<SectionHeader>) -> Vec<SectionHeader> {
        Vec::new() // LibReader does not support setting section headers
    }

    fn get_coff_object(&mut self) -> Result<&CoffObject, GaiaError> {
        Err(GaiaError::not_implemented("LibReader does not support direct reading of COFF objects, please use member objects"))
    }

    fn set_coff_object(&mut self, _object: CoffObject) -> Option<CoffObject> {
        None // LibReader does not support setting COFF objects
    }

    fn get_coff_info(&mut self) -> Result<&CoffInfo, GaiaError> {
        if self.lazy_info.is_none() {
            let info = self.create_lib_info()?;
            self.lazy_info = Some(info);
        }
        Ok(self.lazy_info.as_ref().unwrap())
    }

    fn set_coff_info(&mut self, info: CoffInfo) -> Option<CoffInfo> {
        self.lazy_info.replace(info)
    }
}

impl<R: Read + Seek> LibReader<R> {
    /// Check if it is a valid static library file
    pub fn is_valid_lib(&mut self) -> Result<bool, GaiaError> {
        let mut magic = [0u8; 8];
        self.reader.read_exact(&mut magic)?;
        self.reader.seek(std::io::SeekFrom::Start(0))?;
        Ok(&magic == b"!<arch>\n")
    }

    /// View static library file information
    pub fn view(&mut self) -> Result<CoffInfo, GaiaError> {
        if let Some(ref info) = self.lazy_info {
            return Ok(info.clone());
        }

        let info = self.create_lib_info()?;
        self.lazy_info = Some(info.clone());
        Ok(info)
    }

    /// Read static library
    pub fn read_library(&mut self) -> Result<&StaticLibrary, GaiaError> {
        if self.lazy_library.is_none() {
            self.lazy_library = Some(self.read_library_force()?);
        }
        match self.lazy_library.as_ref() {
            Some(s) => Ok(s),
            None => unreachable!(),
        }
    }

    /// Force read static library (without cache)
    fn read_library_force(&mut self) -> Result<StaticLibrary, GaiaError> {
        // Verify file header
        if !self.is_valid_lib()? {
            return Err(GaiaError::invalid_data("Not a valid static library file"));
        }

        // Skip file header "!<arch>\n" (8 bytes)
        self.reader.seek(std::io::SeekFrom::Start(8))?;

        let mut members = Vec::new();
        let mut symbol_index = Vec::new();
        let file_size = self.get_file_size()?;

        println!("Starting library file analysis, file size: {} bytes", file_size);
        println!("After skipping the file header, reading members starting from position 8");

        // Read all members
        while self.get_position()? < file_size {
            let current_pos = self.get_position()?;
            println!("Current position: {}, Remaining: {} bytes", current_pos, file_size - current_pos);

            // Check if there is enough data to read the member header (60 bytes)
            if current_pos + 60 > file_size {
                println!("Remaining data less than 60 bytes, stopping analysis");
                break;
            }

            match self.read_member() {
                Ok(member) => {
                    println!("Read member: '{}', Size: {} bytes", member.header.name, member.header.size);

                    // Check if it's a symbol table (supports traditional format "/" and modern format "/<ECSYMBOLS>")
                    if member.header.name == "/" || member.header.name.starts_with("/<ECSYMBOLS>") {
                        println!("Found symbol table: '{}', starting symbol analysis", member.header.name);
                        println!("Symbol table data size: {} bytes", member.data.len());
                        if member.data.len() >= 4 {
                            let symbol_count_be =
                                u32::from_be_bytes([member.data[0], member.data[1], member.data[2], member.data[3]]);
                            println!("Symbol table header shows symbol count: {}", symbol_count_be);
                            // Print the hex content of the first 16 bytes
                            let preview_len = std::cmp::min(16, member.data.len());
                            let hex_preview: String =
                                member.data[..preview_len].iter().map(|b| format!("{:02X}", b)).collect::<Vec<_>>().join(" ");
                            println!("First {} bytes of symbol table content: {}", preview_len, hex_preview);
                        }
                        // This is a symbol table, parsing symbols
                        match self.parse_symbol_table(&member.data, members.len()) {
                            Ok(symbols) => {
                                println!("Successfully parsed {} symbols", symbols.len());
                                if !symbols.is_empty() {
                                    println!("First 5 symbols: {:?}", &symbols[..std::cmp::min(5, symbols.len())]);
                                }
                                symbol_index.extend(symbols);
                            }
                            Err(e) => {
                                println!("Symbol table analysis failed: {:?}", e);
                            }
                        }
                    }
                    else if member.header.name == "//" {
                        println!("Found extended name table, skipping");
                        // This is an extended name table, skipping
                    }
                    else {
                        println!("Found ordinary member: {}", member.header.name);
                        // This is an ordinary member
                    }
                    members.push(member);
                }
                Err(e) => {
                    // If reading fails, record the error but continue
                    println!("Failed to read member: {:?}", e);
                    self.add_diagnostics(e);
                    break;
                }
            }
        }

        println!("Analysis complete, total members: {}, total symbols: {}", members.len(), symbol_index.len());
        Ok(StaticLibrary { signature: "!<arch>\n".to_string(), members, symbol_index })
    }

    /// Create library info
    fn create_lib_info(&mut self) -> Result<CoffInfo, GaiaError> {
        let file_size = self.get_file_size()?;
        let library = self.read_library()?;

        Ok(CoffInfo {
            file_type: CoffFileType::StaticLibrary,
            target_arch: Architecture::Unknown,
            section_count: 0,
            symbol_count: library.symbol_index.len() as u32,
            file_size,
            timestamp: 0,
        })
    }

    /// Get file size
    pub fn get_file_size(&mut self) -> Result<u64, GaiaError> {
        let current_pos = self.get_position()?;
        let size = self.reader.seek(std::io::SeekFrom::End(0))?;
        self.set_position(current_pos)?;
        Ok(size)
    }

    /// Read member
    fn read_member(&mut self) -> Result<ArchiveMember, GaiaError> {
        let header = self.read_member_header()?;
        let mut data = vec![0u8; header.size as usize];
        self.reader.read_exact(&mut data)?;

        // Align to even boundaries
        if header.size % 2 == 1 {
            self.reader.read_u8()?;
        }

        // Attempt to parse COFF object (if data is in valid COFF format)
        let coff_object = if data.len() > 20 {
            // Attempt to read COFF object from data
            // Return None for now as a specific CoffReader implementation is needed
            // TODO: Implement a simple COFF object parser
            None
        }
        else {
            None
        };

        Ok(ArchiveMember { header, data, coff_object })
    }

    /// Parse symbol table
    fn parse_symbol_table(&self, data: &[u8], member_index: usize) -> Result<Vec<(String, usize)>, GaiaError> {
        let mut symbols = Vec::new();

        if data.len() < 4 {
            return Ok(symbols);
        }

        // Read symbol count (first 4 bytes, big-endian)
        let symbol_count = u32::from_be_bytes([data[0], data[1], data[2], data[3]]) as usize;

        if symbol_count == 0 || symbol_count > 100000 {
            // Unreasonable symbol count, might not be a standard symbol table format
            return Ok(symbols);
        }

        // Skip symbol offset table (4 bytes offset per symbol)
        let string_table_start = 4 + symbol_count * 4;

        if string_table_start >= data.len() {
            return Ok(symbols);
        }

        // Parse string table
        let string_data = &data[string_table_start..];
        let mut current_pos = 0;

        while current_pos < string_data.len() && symbols.len() < symbol_count {
            // Find the next null terminator
            if let Some(null_pos) = string_data[current_pos..].iter().position(|&b| b == 0) {
                if null_pos > 0 {
                    if let Ok(symbol_name) = std::str::from_utf8(&string_data[current_pos..current_pos + null_pos]) {
                        symbols.push((symbol_name.to_string(), member_index));
                    }
                }
                current_pos += null_pos + 1;
            }
            else {
                break;
            }
        }

        Ok(symbols)
    }

    /// Read member header
    fn read_member_header(&mut self) -> Result<ArchiveMemberHeader, GaiaError> {
        let mut name = [0u8; 16];
        self.reader.read_exact(&mut name)?;

        let mut date = [0u8; 12];
        self.reader.read_exact(&mut date)?;

        let mut uid = [0u8; 6];
        self.reader.read_exact(&mut uid)?;

        let mut gid = [0u8; 6];
        self.reader.read_exact(&mut gid)?;

        let mut mode = [0u8; 8];
        self.reader.read_exact(&mut mode)?;

        let mut size = [0u8; 10];
        self.reader.read_exact(&mut size)?;

        let mut end_chars = [0u8; 2];
        self.reader.read_exact(&mut end_chars)?;

        println!("Member header terminator: {:02X} {:02X} (Expected: 60 0A)", end_chars[0], end_chars[1]);

        if &end_chars != b"`\n" {
            return Err(GaiaError::invalid_data("Invalid member header terminator"));
        }

        // Parse fields - fields in ar format are ASCII strings, padded with spaces on the right
        let name_str = std::str::from_utf8(&name).map_err(|_| GaiaError::invalid_data("Invalid name field"))?;
        // Names in ar format end with a slash, then padded with spaces
        let name = name_str.trim_end_matches(' ').trim_end_matches('/').to_string();

        let date_str = std::str::from_utf8(&date).map_err(|_| GaiaError::invalid_data("Invalid date field"))?;
        let timestamp = date_str.trim_end_matches(' ').parse::<u32>().unwrap_or(0);

        let uid_str = std::str::from_utf8(&uid).map_err(|_| GaiaError::invalid_data("Invalid user ID field"))?;
        let user_id = uid_str.trim_end_matches(' ').parse::<u16>().unwrap_or(0);

        let gid_str = std::str::from_utf8(&gid).map_err(|_| GaiaError::invalid_data("Invalid group ID field"))?;
        let group_id = gid_str.trim_end_matches(' ').parse::<u16>().unwrap_or(0);

        let mode_str = std::str::from_utf8(&mode).map_err(|_| GaiaError::invalid_data("Invalid mode field"))?;
        let mode = u32::from_str_radix(mode_str.trim_end_matches(' '), 8).unwrap_or(0); // Mode field is octal

        let size_str = std::str::from_utf8(&size).map_err(|_| GaiaError::invalid_data("Invalid size field"))?;
        let size = size_str.trim_end_matches(' ').parse::<u32>().unwrap_or(0);
        Ok(ArchiveMemberHeader { name, timestamp, user_id, group_id, mode, size })
    }
}