ndaal-binsec 3.2.3

Binary (in)security scanner for ELF/PE/Mach-O with native, strictly-validated SARIF 2.1.0 and Markdown output (ndaal fork of binsec)
Documentation
//! Implements the main interface necessary in order to parse binary inputs.
//! Should be used to detect format and security mitigations for a single binary.
#![allow(clippy::match_bool)]

use crate::check::{Analyze, GenericMap};

use goblin::mach::Mach;
use goblin::Object;

use byte_unit::Byte;
use chrono::{DateTime, Utc};
use serde_json::{json, Value};

use std::fs::{self, Metadata};
use std::path::{Path, PathBuf};

pub mod check;
pub mod sarif;

/// Output format selected on the command line.
#[derive(Clone, Copy, Debug, PartialEq, Eq, clap::ValueEnum)]
pub enum Format {
    /// Human-readable tables (default).
    Table,
    /// Pretty-printed JSON report.
    Json,
    /// SARIF 2.1.0 static-analysis report (strictly validated before output).
    Sarif,
    /// Markdown rendered from the validated SARIF 2.1.0 report.
    Markdown,
}

/// Custom error type for all errors types that binsec might encounter
#[derive(thiserror::Error, Debug)]
pub enum BinError {
    #[error("IOError: `{0}`")]
    Io(#[from] std::io::Error),
    #[error("libgoblin: `{0}`")]
    Goblin(#[from] goblin::error::Error),
    #[error("serde: `{0}`")]
    Serde(#[from] serde_json::Error),
    #[error("internal: `{0}`")]
    Internal(String),
    #[error("unknown data store error")]
    Unknown,
}

pub type BinResult<R> = Result<R, BinError>;

/// Interfaces static analysis and wraps around parsed information for serialization.
#[derive(serde::Serialize)]
pub struct Detector {
    basic: GenericMap,
    compilation: GenericMap,
    mitigations: GenericMap,
    instrumentation: GenericMap,
}

impl Detector {
    pub fn run(binpath: PathBuf) -> BinResult<Self> {
        // metadata shared by every binary format
        let basic_map: GenericMap = Self::base_metadata(&binpath)?;

        // read raw binary from path
        let data: Vec<u8> = std::fs::read(&binpath)?;

        // parse executable as format and run format-specific mitigation checks
        match Object::parse(&data)? {
            Object::Elf(elf) => Ok(Self {
                basic: Self::elf_basic(basic_map, &elf),
                compilation: elf.compilation(&data)?,
                mitigations: elf.mitigations(),
                instrumentation: elf.instrumentation(),
            }),
            Object::PE(pe) => Ok(Self {
                basic: Self::pe_basic(basic_map, &pe),
                compilation: pe.compilation(&data)?,
                mitigations: pe.mitigations(),
                instrumentation: pe.instrumentation(),
            }),
            Object::Mach(Mach::Binary(mach)) => Ok(Self {
                basic: Self::mach_basic(basic_map, &mach),
                compilation: mach.compilation(&data)?,
                mitigations: mach.mitigations(),
                instrumentation: mach.instrumentation(),
            }),
            bin => Err(BinError::Internal(format!(
                "unsupported filetype for analysis: {:?}",
                bin
            ))),
        }
    }

    /// Collect the path / size / last-modified metadata common to every
    /// supported binary format.
    fn base_metadata(binpath: &Path) -> BinResult<GenericMap> {
        let mut basic_map: GenericMap = GenericMap::new();

        // get absolute path to executable
        let abspath_buf: PathBuf = fs::canonicalize(binpath)?;
        let abspath: String = abspath_buf
            .to_str()
            .ok_or_else(|| BinError::Internal("path is not valid UTF-8".to_string()))?
            .to_string();
        basic_map.insert("Absolute Path".to_string(), json!(abspath));

        // parse out initial metadata used in all binary formats
        let metadata: Metadata = fs::metadata(binpath)?;

        // filesize with readable byte unit
        let size: u128 = metadata.len() as u128;
        let byte = Byte::from_bytes(size);
        let filesize: String = byte.get_appropriate_unit(false).to_string();
        basic_map.insert("File Size".to_string(), json!(filesize));

        // parse out readable modified timestamp
        if let Ok(time) = metadata.accessed() {
            let datetime: DateTime<Utc> = time.into();
            let stamp: String = datetime.format("%Y-%m-%d %H:%M:%S").to_string();
            basic_map.insert("Last Modified".to_string(), json!(stamp));
        }
        Ok(basic_map)
    }

    /// Populate the format-agnostic `basic` map with ELF header facts.
    fn elf_basic(mut basic_map: GenericMap, elf: &goblin::elf::Elf<'_>) -> GenericMap {
        use goblin::elf::header;

        basic_map.insert("Binary Format".to_string(), json!("ELF"));
        let arch: String = header::machine_to_str(elf.header.e_machine).to_string();
        basic_map.insert("Architecture".to_string(), json!(arch));
        let entry_point: String = format!("0x{:x}", elf.header.e_entry);
        basic_map.insert("Entry Point Address".to_string(), json!(entry_point));
        basic_map
    }

    /// Populate the format-agnostic `basic` map with PE header facts.
    fn pe_basic(mut basic_map: GenericMap, pe: &goblin::pe::PE<'_>) -> GenericMap {
        basic_map.insert("Binary Format".to_string(), json!("PE/EXE"));
        let arch: &str = if pe.is_64 { "PE32+" } else { "PE32" };
        basic_map.insert("Architecture".to_string(), json!(arch));
        let entry_point: String = format!("0x{:x}", pe.entry);
        basic_map.insert("Entry Point Address".to_string(), json!(entry_point));
        basic_map
    }

    /// Populate the format-agnostic `basic` map with Mach-O header facts.
    fn mach_basic(mut basic_map: GenericMap, mach: &goblin::mach::MachO<'_>) -> GenericMap {
        use goblin::mach::constants::cputype;
        use goblin::mach::load_command::CommandVariant;

        basic_map.insert("Binary Format".to_string(), json!("Mach-O"));
        let cputype: &str = match mach.header.cputype() {
            cputype::CPU_TYPE_I386 => "i386",
            cputype::CPU_TYPE_X86_64 => "x86_64",
            cputype::CPU_TYPE_ARM => "arm",
            cputype::CPU_TYPE_ARM64 => "arm64",
            _ => "<unknown>",
        };
        basic_map.insert("Architecture".to_string(), json!(cputype));

        for cmd in &mach.load_commands {
            if let CommandVariant::Main(entry) = cmd.command {
                let entry_point: String = format!("0x{:x}", entry.entryoff);
                basic_map.insert("Entry Point".to_string(), json!(entry_point));
            }
        }
        basic_map
    }

    /// Output the finalized report for the analysed executable.
    ///
    /// `--json <PATH>` (back-compat) always wins: it writes the pretty JSON
    /// report to `PATH`, or to stdout when `PATH` is `-`. Otherwise the
    /// `format` selects the stdout representation: human tables (default),
    /// JSON, or a native SARIF 2.1.0 document.
    pub fn output(&self, json: Option<String>, format: Format) -> BinResult<()> {
        if let Some(path) = json {
            let output: String = serde_json::to_string_pretty(self)?;
            if path == "-" {
                println!("{output}");
            } else {
                fs::write(path, output)?;
            }
            return Ok(());
        }

        match format {
            Format::Json => {
                println!("{}", serde_json::to_string_pretty(self)?);
            },
            Format::Sarif => {
                let report: String = self.to_sarif(env!("CARGO_PKG_VERSION"))?;
                sarif::validate_sarif(&report)?;
                println!("{report}");
            }
            Format::Markdown => {
                let report: String = self.to_sarif(env!("CARGO_PKG_VERSION"))?;
                let markdown: String = sarif::to_markdown(&report)?;
                println!("{markdown}");
            },
            Format::Table => {
                // will always be printed
                Detector::table("BASIC", &self.basic);
                Detector::table("COMPILATION", &self.compilation);
                Detector::table("EXPLOIT MITIGATIONS", &self.mitigations);

                // get instrumentation if any are set
                if !self.instrumentation.is_empty() {
                    Detector::table("INSTRUMENTATION", &self.instrumentation);
                }
            },
        }
        Ok(())
    }

    /// Build a native SARIF 2.1.0 JSON report from this detector's findings.
    /// The analysed binary's absolute path (from the basic section) anchors
    /// every result's `artifactLocation`.
    ///
    /// # Errors
    /// Returns an error if SARIF construction or serialization fails.
    pub fn to_sarif(&self, tool_version: &str) -> BinResult<String> {
        let binary_uri: &str = self
            .basic
            .get("Absolute Path")
            .and_then(Value::as_str)
            .unwrap_or_default();
        let sections: [(&str, &GenericMap); 4] = [
            ("basic", &self.basic),
            ("compilation", &self.compilation),
            ("mitigations", &self.mitigations),
            ("instrumentation", &self.instrumentation),
        ];
        sarif::build(binary_uri, &sections, tool_version)
    }

    /// Auto-generate the `report.sarif` + `report.md` pair under
    /// `output_dir`, per the pipeline in `skills/rust-sarif.md`. The SARIF is
    /// strict-validated and the Markdown structurally validated before each
    /// file is written.
    ///
    /// # Errors
    /// Returns an error if `output_dir` is not a directory, validation fails,
    /// or a report file cannot be written.
    pub fn write_reports(&self, output_dir: &Path) -> BinResult<()> {
        if !output_dir.is_dir() {
            return Err(BinError::Internal(format!(
                "output directory does not exist: {}",
                output_dir.display()
            )));
        }
        let sarif_json: String = self.to_sarif(env!("CARGO_PKG_VERSION"))?;
        sarif::validate_sarif(&sarif_json)?;
        fs::write(output_dir.join("report.sarif"), &sarif_json)?;

        let markdown: String = sarif::to_markdown(&sarif_json)?;
        fs::write(output_dir.join("report.md"), &markdown)?;
        Ok(())
    }

    #[inline]
    pub fn table(name: &str, mapping: &GenericMap) {
        println!("-----------------------------------------------");
        println!("{}", name);
        println!("-----------------------------------------------\n");
        for (name, feature) in mapping {
            let value: String = match feature {
                Value::Bool(true) => String::from("\x1b[0;32m✔️\x1b[0m"),
                Value::Bool(false) => String::from("\x1b[0;31m✖️\x1b[0m"),
                Value::String(val) => val.clone(),
                other => other.to_string(),
            };
            println!("{0: <45} {1}", name, value);
        }
        println!();
    }
}