nimrod 0.2.0

Parse and inspect Nim-compiled native binaries
Documentation
//! High-level [`NimBinary`] facade.
//!
//! Owns a parsed [`Container`], a cached [`DetectionReport`], and a lazy
//! cache of every scan accessor. Each scan runs at most once per
//! `NimBinary` instance — repeated calls return the same `&[T]` slice.

use std::sync::OnceLock;

use crate::{
    container::{Arch, Container, Format},
    detect::{DetectionMatches, DetectionReport},
    error::Result,
    inits::{self, InitFunction},
    metadata::{self, GcMode},
    modules::{self, ModuleMap},
    paths::{self, NimblePath},
    raises::{self, ExceptionRef},
    rtti::symbols::{self as rtti_symbols, RttiSymbol},
    shims::{self, EntryShim},
    sites::{self, RaiseSite},
    stacktrace::{self, StackTraceHarvest},
    strings::{v1 as strings_v1, v2 as strings_v2},
};

/// Lazy result cache. Each scan runs at most once per [`NimBinary`].
#[derive(Default)]
struct Cache {
    entry_shims: OnceLock<Vec<EntryShim>>,
    init_functions: OnceLock<Vec<InitFunction>>,
    rtti_symbols: OnceLock<Vec<RttiSymbol>>,
    string_literals_v2: OnceLock<Vec<strings_v2::StringLiteral>>,
    string_literals_v1: OnceLock<Vec<strings_v1::StringLiteralV1>>,
    stack_trace: OnceLock<StackTraceHarvest>,
    nimble_paths: OnceLock<Vec<NimblePath>>,
    exception_types: OnceLock<Vec<ExceptionRef>>,
    raise_sites: OnceLock<Vec<RaiseSite>>,
    module_map: OnceLock<ModuleMap>,
}

/// Parsed view of a Nim-compiled native binary.
///
/// Construct via [`NimBinary::from_bytes`]. The type borrows from the input
/// byte slice — no copies of the original bytes are made.
///
/// # Caching
///
/// Every scan accessor (`entry_shims`, `init_functions`, `rtti_symbols`,
/// `string_literals_v2`, `string_literals_v1`, `stack_trace`,
/// `nimble_paths`, `exception_types`, `raise_sites`, `module_map`) runs
/// at most once. The first call performs the full scan and stores the
/// result; subsequent calls return the same borrowed slice in `O(1)`.
/// Caching is thread-safe (`OnceLock`).
///
/// # Address space
///
/// Every address-bearing field returned by `NimBinary` and its scan
/// methods is a **virtual address** in the input image's load space —
/// not a file offset. Most disassemblers (Binary Ninja, Ghidra, IDA)
/// work in image-relative space; convert with the `*_rva` helpers on
/// this type, with [`Container::va_to_rva`], or with the standalone
/// helper [`crate::va_to_i64`] if you need a signed-integer encoding
/// for persistence.
pub struct NimBinary<'a> {
    container: Container<'a>,
    detection: DetectionReport,
    cache: Cache,
}

impl<'a> NimBinary<'a> {
    /// Parses the given bytes as a Nim-compiled native binary.
    ///
    /// This unconditionally parses the container (ELF, PE, or Mach-O) and
    /// runs the full detection probe set. It does **not** early-exit for
    /// non-Nim binaries — the caller is expected to inspect
    /// [`NimBinary::is_nim`] or [`NimBinary::detection`] and act
    /// accordingly.
    pub fn from_bytes(bytes: &'a [u8]) -> Result<Self> {
        let container = Container::parse(bytes)?;
        let detection = DetectionReport::run(&container);
        Ok(Self {
            container,
            detection,
            cache: Cache::default(),
        })
    }

    /// Returns the underlying input byte slice.
    pub fn as_bytes(&self) -> &'a [u8] {
        self.container.bytes()
    }

    /// Returns the detected container format.
    pub fn format(&self) -> Format {
        self.container.format()
    }

    /// Returns the detected CPU architecture.
    pub fn arch(&self) -> Arch {
        self.container.arch()
    }

    /// Returns the parsed container view. Mainly useful for downstream
    /// crates that want to walk sections or symbols directly.
    pub fn container(&self) -> &Container<'a> {
        &self.container
    }

    /// Returns `true` if at least one detection probe matched.
    pub fn is_nim(&self) -> bool {
        self.detection.is_nim
    }

    /// Returns the full detection report.
    pub fn detection(&self) -> &DetectionReport {
        &self.detection
    }

    /// Returns the matched detection flags.
    pub fn detection_matches(&self) -> DetectionMatches {
        self.detection.matches
    }

    /// Scans for Nim entry-point shims (`NimMain`, `PreMain`, etc.).
    ///
    /// Cached: subsequent calls return the same slice in `O(1)`.
    pub fn entry_shims(&self) -> &[EntryShim] {
        self.cache
            .entry_shims
            .get_or_init(|| shims::scan(&self.container))
    }

    /// Scans for Nim module init functions (`*Init000`, `*DatInit000`).
    ///
    /// Cached: subsequent calls return the same slice in `O(1)`.
    pub fn init_functions(&self) -> &[InitFunction] {
        self.cache
            .init_functions
            .get_or_init(|| inits::scan(&self.container))
    }

    /// Infers the GC / memory-management mode from RTTI symbol presence.
    pub fn gc_mode(&self) -> GcMode {
        metadata::gc_mode(self.detection.matches)
    }

    /// Detects the `--nimMainPrefix` value, if any.
    ///
    /// Returns `Some("")` for the default (empty) prefix, `Some(prefix)`
    /// for a custom prefix, or `None` if `NimMain` was not found.
    pub fn nim_main_prefix(&self) -> Option<&str> {
        metadata::nim_main_prefix(&self.container)
    }

    /// Enumerates all RTTI globals (`NTIv2_` and `NTI_`) from the symbol
    /// table.
    ///
    /// Cached: subsequent calls return the same slice in `O(1)`.
    pub fn rtti_symbols(&self) -> &[RttiSymbol] {
        self.cache
            .rtti_symbols
            .get_or_init(|| rtti_symbols::scan(&self.container))
    }

    /// Scans rodata for V2 string literals (ARC/ORC builds).
    ///
    /// Cached: subsequent calls return the same slice in `O(1)`.
    pub fn string_literals_v2(&self) -> &[strings_v2::StringLiteral] {
        self.cache
            .string_literals_v2
            .get_or_init(|| strings_v2::scan(&self.container))
    }

    /// Scans rodata for V1 string literals (refc builds, best-effort).
    ///
    /// Cached: subsequent calls return the same slice in `O(1)`.
    pub fn string_literals_v1(&self) -> &[strings_v1::StringLiteralV1] {
        self.cache
            .string_literals_v1
            .get_or_init(|| strings_v1::scan(&self.container))
    }

    /// Harvests stack-trace file paths and proc names from rodata.
    ///
    /// Cached: subsequent calls return the same value in `O(1)`.
    pub fn stack_trace(&self) -> &StackTraceHarvest {
        self.cache
            .stack_trace
            .get_or_init(|| stacktrace::harvest(&self.container))
    }

    /// Scans for `.nimble/pkgs` path leaks revealing build-host
    /// attribution.
    ///
    /// Cached: subsequent calls return the same slice in `O(1)`.
    pub fn nimble_paths(&self) -> &[NimblePath] {
        self.cache
            .nimble_paths
            .get_or_init(|| paths::scan(&self.container))
    }

    /// Scans for exception type name cstrings (phase 1 raise-site
    /// recovery).
    ///
    /// Cached: subsequent calls return the same slice in `O(1)`.
    pub fn exception_types(&self) -> &[ExceptionRef] {
        self.cache
            .exception_types
            .get_or_init(|| raises::scan(&self.container))
    }

    /// Recovers full raise-site tuples (type, proc, file, line) by
    /// analysing call sites to `raiseExceptionEx` (phase 2).
    ///
    /// Requires x86_64 or AArch64 architecture. Returns an empty slice
    /// on unsupported architectures or if `raiseExceptionEx` is not
    /// found in the symbol table.
    ///
    /// Cached: subsequent calls return the same slice in `O(1)`.
    pub fn raise_sites(&self) -> &[RaiseSite] {
        self.cache
            .raise_sites
            .get_or_init(|| sites::scan(&self.container))
    }

    /// Builds a cross-referenced module map from init functions,
    /// demangled symbols, and stack-trace file paths.
    ///
    /// Cached: subsequent calls return the same value in `O(1)`.
    pub fn module_map(&self) -> &ModuleMap {
        self.cache
            .module_map
            .get_or_init(|| modules::build(&self.container))
    }

    /// Returns the image base address. See [`Container::image_base`].
    pub fn image_base(&self) -> u64 {
        self.container.image_base()
    }

    /// Returns an [`EntryShim`]'s address as an RVA (image-relative).
    ///
    /// `None` if the shim's VA is below the image base, which would
    /// indicate a malformed container.
    pub fn shim_rva(&self, shim: &EntryShim) -> Option<u64> {
        self.container.va_to_rva(shim.address)
    }

    /// Returns an [`InitFunction`]'s address as an RVA.
    pub fn init_rva(&self, init: &InitFunction) -> Option<u64> {
        self.container.va_to_rva(init.address)
    }

    /// Returns a [`RaiseSite`]'s call address as an RVA.
    pub fn raise_rva(&self, site: &RaiseSite) -> Option<u64> {
        self.container.va_to_rva(site.call_addr)
    }

    /// Returns an [`RttiSymbol`]'s address as an RVA.
    pub fn rtti_rva(&self, sym: &RttiSymbol) -> Option<u64> {
        self.container.va_to_rva(sym.address)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn from_bytes_rejects_garbage() {
        let result = NimBinary::from_bytes(b"not an elf or pe or macho");
        assert!(result.is_err());
    }
}