#![allow(
clippy::type_complexity,
clippy::borrowed_box,
clippy::mutable_key_type,
clippy::collapsible_if,
clippy::collapsible_match
)]
extern crate core;
use core::fmt;
use std::{
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
path::PathBuf,
thread::spawn,
};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
use smda::FileArchitecture;
use yaml_rust::Yaml;
static TAG_BRACKET_RE: Lazy<regex::Regex> = Lazy::new(|| {
regex::Regex::new(r##"[^]]*\[(?P<tag>[^]]*)]"##)
.expect("compile-time regex literal — pattern is valid")
});
static PARTS_ID_RE: Lazy<regex::Regex> = Lazy::new(|| {
regex::Regex::new(r"^(.*?)(?:\s*\[(.*?)])?$")
.expect("compile-time regex literal — pattern is valid")
});
use consts::FileFormat;
#[cfg(feature = "properties")]
use consts::Os;
use sede::{from_hex, to_hex};
pub use crate::error::Error;
use crate::security::options::status::SecurityCheckStatus;
pub(crate) mod consts;
mod error;
mod extractor;
pub mod flirt;
pub mod rules;
mod security;
mod sede;
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Copy, Clone)]
pub enum LibCSpec {
LSB1,
LSB1dot1,
LSB1dot2,
LSB1dot3,
LSB2,
LSB2dot0dot1,
LSB2dot1,
LSB3,
LSB3dot1,
LSB3dot2,
LSB4,
LSB4dot1,
LSB5,
}
impl fmt::Display for LibCSpec {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let spec_name = match *self {
LibCSpec::LSB1
| LibCSpec::LSB1dot1
| LibCSpec::LSB1dot2
| LibCSpec::LSB1dot3
| LibCSpec::LSB2
| LibCSpec::LSB2dot0dot1
| LibCSpec::LSB2dot1
| LibCSpec::LSB3
| LibCSpec::LSB3dot1
| LibCSpec::LSB3dot2
| LibCSpec::LSB4
| LibCSpec::LSB4dot1
| LibCSpec::LSB5 => "Linux Standard Base",
};
let spec_version = match *self {
LibCSpec::LSB1 => "1.0.0",
LibCSpec::LSB1dot1 => "1.1.0",
LibCSpec::LSB1dot2 => "1.2.0",
LibCSpec::LSB1dot3 => "1.3.0",
LibCSpec::LSB2 => "2.0.0",
LibCSpec::LSB2dot0dot1 => "2.0.1",
LibCSpec::LSB2dot1 => "2.1.0",
LibCSpec::LSB3 => "3.0.0",
LibCSpec::LSB3dot1 => "3.1.0",
LibCSpec::LSB3dot2 => "3.2.0",
LibCSpec::LSB4 => "4.0.0",
LibCSpec::LSB4dot1 => "4.1.0",
LibCSpec::LSB5 => "5.0.0",
};
write!(f, "{spec_name} {spec_version}")
}
}
impl LibCSpec {
pub(crate) fn get_functions_with_checked_versions(self) -> &'static [&'static str] {
match self {
LibCSpec::LSB1
| LibCSpec::LSB1dot1
| LibCSpec::LSB1dot2
| LibCSpec::LSB1dot3
| LibCSpec::LSB2
| LibCSpec::LSB2dot0dot1
| LibCSpec::LSB2dot1
| LibCSpec::LSB3
| LibCSpec::LSB3dot1
| LibCSpec::LSB3dot2 => &[],
LibCSpec::LSB4 | LibCSpec::LSB4dot1 | LibCSpec::LSB5 => {
security::elf::checked_functions::LSB_4_0_0_FUNCTIONS_WITH_CHECKED_VERSIONS
}
}
}
}
impl From<String> for LibCSpec {
fn from(value: String) -> Self {
match value.as_str() {
"1.0.0" => LibCSpec::LSB1,
"1.1.0" => LibCSpec::LSB1dot1,
"1.2.0" => LibCSpec::LSB1dot2,
"1.3.0" => LibCSpec::LSB1dot3,
"2.0.0" => LibCSpec::LSB2,
"2.0.1" => LibCSpec::LSB2dot0dot1,
"2.1.0" => LibCSpec::LSB2dot1,
"3.0.0" => LibCSpec::LSB3,
"3.1.0" => LibCSpec::LSB3dot1,
"3.2.0" => LibCSpec::LSB3dot2,
"4.0.0" => LibCSpec::LSB4,
"4.1.0" => LibCSpec::LSB4dot1,
"5.0.0" => LibCSpec::LSB5,
_ => LibCSpec::LSB5,
}
}
}
#[derive(Clone, Debug)]
pub struct BinarySecurityCheckOptions {
pub(crate) libc: Option<PathBuf>,
pub(crate) sysroot: Option<PathBuf>,
pub(crate) libc_spec: Option<LibCSpec>,
pub(crate) no_libc: bool,
pub(crate) input_file: PathBuf,
}
impl BinarySecurityCheckOptions {
pub fn new(
libc: Option<PathBuf>,
sysroot: Option<PathBuf>,
libc_spec: Option<LibCSpec>,
) -> Self {
Self {
libc,
sysroot,
libc_spec,
no_libc: false,
input_file: PathBuf::new(),
}
}
}
impl Default for BinarySecurityCheckOptions {
fn default() -> Self {
Self::new(None, None, None)
}
}
fn noop_logger(_: &str) {}
fn make_library_filter<'a>(
extractor: &'a (dyn extractor::Extractor + '_),
flirt: Option<&'a flirt::FlirtMatcher>,
) -> Box<dyn Fn(u64) -> Option<String> + Sync + Send + 'a> {
if let Some(matcher) = flirt {
return Box::new(move |addr: u64| {
matcher
.match_function_at(addr, extractor)
.map(str::to_owned)
});
}
Box::new(|_addr: u64| None)
}
pub struct AnalyzeBuilder<'a> {
rules: Option<String>,
high_accuracy: bool,
resolve_tailcalls: bool,
logger: &'a (dyn Fn(&str) + Sync + Send),
features_dump: bool,
security_checks: Option<BinarySecurityCheckOptions>,
flirt_signatures: Option<std::path::PathBuf>,
flirt_matcher: Option<std::sync::Arc<flirt::FlirtMatcher>>,
}
impl<'a> Default for AnalyzeBuilder<'a> {
fn default() -> Self {
AnalyzeBuilder {
rules: None,
high_accuracy: false,
resolve_tailcalls: false,
logger: &noop_logger,
features_dump: false,
security_checks: None,
flirt_signatures: None,
flirt_matcher: None,
}
}
}
impl<'a> AnalyzeBuilder<'a> {
pub fn new() -> Self {
Self::default()
}
pub fn rules(mut self, path: impl Into<String>) -> Self {
self.rules = Some(path.into());
self
}
pub fn high_accuracy(mut self, on: bool) -> Self {
self.high_accuracy = on;
self
}
pub fn resolve_tailcalls(mut self, on: bool) -> Self {
self.resolve_tailcalls = on;
self
}
pub fn logger(mut self, logger: &'a (dyn Fn(&str) + Sync + Send)) -> Self {
self.logger = logger;
self
}
pub fn features_dump(mut self, on: bool) -> Self {
self.features_dump = on;
self
}
pub fn security_checks(mut self, opts: BinarySecurityCheckOptions) -> Self {
self.security_checks = Some(opts);
self
}
pub fn signatures(mut self, path: impl Into<std::path::PathBuf>) -> Self {
self.flirt_signatures = Some(path.into());
self
}
pub fn with_flirt_matcher(mut self, matcher: std::sync::Arc<flirt::FlirtMatcher>) -> Self {
self.flirt_matcher = Some(matcher);
self
}
pub fn from_file(self, file_name: impl AsRef<str>) -> Result<FileCapabilities> {
let rule_path = self.rules.ok_or(Error::BuilderMissingRules)?;
let f = file_name.as_ref().to_string();
let (format, buffer) = get_format(&f)?;
let extractor = get_file_extractors(
&f,
format,
&buffer,
self.high_accuracy,
self.resolve_tailcalls,
)?;
let rules_thread_handle = spawn(move || rules::RuleSet::new(&rule_path));
let rules = match rules_thread_handle.join() {
Ok(Ok(rules)) => rules,
Ok(Err(_)) | Err(_) => return Err(Error::DescriptionEvaluationError),
};
let mut security_opts = self.security_checks.unwrap_or_default();
security_opts.input_file = PathBuf::from(&f);
let security_checks = security::get_security_checks(&f, &security_opts)?;
let owned_matcher = match (&self.flirt_matcher, self.flirt_signatures.as_ref()) {
(Some(_), _) => None,
(None, Some(p)) => Some(flirt::FlirtMatcher::from_directory(p, self.logger)?),
(None, None) => None,
};
let flirt_ref: Option<&flirt::FlirtMatcher> =
self.flirt_matcher.as_deref().or(owned_matcher.as_ref());
let library_function = make_library_filter(&*extractor, flirt_ref);
let mut file_capabilities;
#[cfg(not(feature = "properties"))]
{
file_capabilities = FileCapabilities::new()?;
}
#[cfg(feature = "properties")]
{
file_capabilities = FileCapabilities::new(&extractor, Some(&buffer))?;
}
#[cfg(not(feature = "verbose"))]
{
file_capabilities.security_checks = BTreeSet::from_iter(security_checks);
let (capabilities, counts, _map_features, library_funcs, _map_features_by_scope) =
find_capabilities(
&rules,
&extractor,
&*library_function,
self.logger,
self.features_dump,
)?;
if self.features_dump {
file_capabilities.map_features = _map_features;
file_capabilities.map_features_by_scope = _map_features_by_scope;
}
file_capabilities.library_functions = library_funcs;
file_capabilities.feature_counts = counts;
file_capabilities.update_capabilities(&capabilities)?;
}
#[cfg(feature = "verbose")]
{
file_capabilities.security_checks = BTreeSet::from_iter(security_checks);
let (capabilities, counts, _map_features, library_funcs, _map_features_by_scope) =
find_capabilities(
&rules,
&extractor,
&*library_function,
self.logger,
self.features_dump,
)?;
if self.features_dump {
file_capabilities.map_features = _map_features;
file_capabilities.map_features_by_scope = _map_features_by_scope;
}
file_capabilities.library_functions = library_funcs;
file_capabilities.feature_counts = counts.clone();
file_capabilities.update_capabilities(&capabilities, &counts)?;
}
Ok(file_capabilities)
}
pub fn from_buffer(self, raw: &[u8], base_addr: u64, bitness: u32) -> Result<FileCapabilities> {
let rule_path = self.rules.ok_or(Error::BuilderMissingRules)?;
let extractor: Box<dyn extractor::Extractor + '_> =
Box::new(extractor::smda::Extractor::from_buffer(
raw,
base_addr,
bitness,
self.high_accuracy,
self.resolve_tailcalls,
)?);
let rules_thread_handle = spawn(move || rules::RuleSet::new(&rule_path));
let rules = match rules_thread_handle.join() {
Ok(Ok(rules)) => rules,
Ok(Err(_)) | Err(_) => return Err(Error::DescriptionEvaluationError),
};
let owned_matcher = match (&self.flirt_matcher, self.flirt_signatures.as_ref()) {
(Some(_), _) => None,
(None, Some(p)) => Some(flirt::FlirtMatcher::from_directory(p, self.logger)?),
(None, None) => None,
};
let flirt_ref: Option<&flirt::FlirtMatcher> =
self.flirt_matcher.as_deref().or(owned_matcher.as_ref());
let library_function = make_library_filter(&*extractor, flirt_ref);
let mut file_capabilities;
#[cfg(not(feature = "properties"))]
{
file_capabilities = FileCapabilities::new()?;
}
#[cfg(feature = "properties")]
{
file_capabilities = FileCapabilities::new(&extractor, None)?;
}
#[cfg(not(feature = "verbose"))]
{
let (capabilities, counts, _map_features, library_funcs, _map_features_by_scope) =
find_capabilities(
&rules,
&extractor,
&*library_function,
self.logger,
self.features_dump,
)?;
if self.features_dump {
file_capabilities.map_features = _map_features;
file_capabilities.map_features_by_scope = _map_features_by_scope;
}
file_capabilities.library_functions = library_funcs;
file_capabilities.feature_counts = counts;
file_capabilities.update_capabilities(&capabilities)?;
}
#[cfg(feature = "verbose")]
{
let (capabilities, counts, _map_features, library_funcs, _map_features_by_scope) =
find_capabilities(
&rules,
&extractor,
&*library_function,
self.logger,
self.features_dump,
)?;
if self.features_dump {
file_capabilities.map_features = _map_features;
file_capabilities.map_features_by_scope = _map_features_by_scope;
}
file_capabilities.library_functions = library_funcs;
file_capabilities.feature_counts = counts.clone();
file_capabilities.update_capabilities(&capabilities, &counts)?;
}
Ok(file_capabilities)
}
}
impl FileCapabilities {
pub fn analyze<'a>() -> AnalyzeBuilder<'a> {
AnalyzeBuilder::new()
}
fn new<'a>(
#[cfg(feature = "properties")] extractor: &Box<dyn extractor::Extractor + 'a>,
#[cfg(feature = "properties")] raw: Option<&[u8]>,
) -> Result<FileCapabilities> {
#[cfg(feature = "properties")]
let (pdb_guid, pdb_age, pdb_filename) = raw
.and_then(smda::xmetadata::parse_pe)
.map(|m| (m.pdb_guid, m.pdb_age, m.pdb_filename))
.unwrap_or_default();
let ss = FileCapabilities {
#[cfg(feature = "properties")]
properties: Properties {
format: FileCapabilities::get_format(extractor)?,
arch: FileCapabilities::get_arch(extractor)?,
os: FileCapabilities::get_os(extractor, raw)?,
base_address: extractor.get_base_address()? as usize,
pdb_guid,
pdb_age,
pdb_filename,
},
attacks: BTreeMap::new(),
mbc: BTreeMap::new(),
capability_namespaces: BTreeMap::new(),
#[cfg(feature = "verbose")]
features: 0,
#[cfg(feature = "verbose")]
functions_capabilities: BTreeMap::new(),
tags: BTreeSet::new(),
security_checks: BTreeSet::new(),
map_features: HashMap::new(),
capabilities_associations: BTreeMap::new(),
library_functions: BTreeMap::new(),
feature_counts: HashMap::new(),
map_features_by_scope: HashMap::new(),
};
Ok(ss)
}
fn update_capabilities(
&mut self,
capabilities: &HashMap<crate::rules::Rule, Vec<(u64, (bool, Vec<u64>))>>,
#[cfg(feature = "verbose")] counts: &HashMap<u64, usize>,
) -> Result<()> {
let re = &*TAG_BRACKET_RE;
for (rule, caps) in capabilities {
if rules::is_lib_rule(rule) {
continue;
}
let mut local_attacks_set: BTreeSet<Attacks> = BTreeSet::new();
let mut local_mbc_set: BTreeSet<Mbc> = BTreeSet::new();
if let Some(Yaml::Array(attacks)) = rule.meta.get(&Yaml::String("att&ck".to_string())) {
for p in attacks.iter().filter_map(|item| item.as_str()) {
if let Ok(attack) = Attacks::from_str(p) {
local_attacks_set.insert(attack);
}
let parts: Vec<&str> = p.split("::").collect();
if parts.len() > 1 {
let detail = parts[1..].join("::");
if let Some(caps) = re.captures(&detail) {
if let Some(tag_match) = caps.name("tag") {
self.tags.insert(tag_match.as_str().to_string());
}
}
self.attacks
.entry(parts[0].to_string())
.or_default()
.insert(detail);
}
}
}
if let Some(Yaml::Array(mbcs)) = rule.meta.get(&Yaml::String("mbc".to_string())) {
for p in mbcs.iter().filter_map(|item| item.as_str()) {
if let Ok(mbc) = Mbc::from_str(p) {
local_mbc_set.insert(mbc);
}
let parts: Vec<&str> = p.split("::").collect();
if parts.len() > 1 {
let detail = parts[1..].join("::");
if let Some(caps) = re.captures(&detail) {
if let Some(tag_match) = caps.name("tag") {
self.tags.insert(tag_match.as_str().to_string());
}
}
self.mbc
.entry(parts[0].to_string())
.or_default()
.insert(detail);
}
}
}
if let Some(Yaml::String(s)) = rule.meta.get(&Yaml::String("namespace".to_string())) {
self.capability_namespaces
.insert(rule.name.clone(), s.clone());
let first_non_zero_address = caps
.iter()
.find(|&&(addr, _)| addr != 0)
.map(|&(addr, _)| addr)
.unwrap_or(0);
let _ = self
.capabilities_associations
.entry(rule.name.clone())
.or_insert_with(|| CapabilityAssociation {
attack: local_attacks_set.clone(),
mbc: local_mbc_set.clone(),
namespace: s.clone(),
name: rule.name.clone(),
address: first_non_zero_address as usize,
});
}
#[cfg(feature = "verbose")]
{
for &(addr, _) in caps {
if addr != 0 {
self.functions_capabilities
.entry(addr)
.and_modify(|fc| {
fc.capabilities.push(rule.name.clone());
})
.or_insert_with(|| FunctionCapabilities {
address: addr as usize,
features: *counts.get(&addr).unwrap_or(&0),
capabilities: vec![rule.name.clone()],
});
}
}
self.features = counts[&0];
}
}
Ok(())
}
pub fn construct_json_for_capabilities_associations(
&mut self,
filter: Option<String>,
) -> Value {
if let Some(f) = filter {
let filters: Vec<&str> = f.split('|').collect();
self.map_features
.retain(|k, _v| filters.iter().any(|filter| k.contains(filter)));
}
let mut rules = serde_json::Map::new();
for (name, association) in &self.capabilities_associations {
let attacks_json = association
.attack
.iter()
.map(|a| {
json!({
"id": a.id,
"subtechnique": a.subtechnique,
"tactic": a.tactic,
"technique": a.technique,
})
})
.collect::<Vec<_>>();
let mbc_json = association
.mbc
.iter()
.map(|m| {
json!({
"objective": m.objective,
"behavior": m.behavior,
"method": m.method,
"id": m.id,
})
})
.collect::<Vec<_>>();
let association_json = json!({
"attacks": attacks_json,
"mbc": mbc_json,
"namespace": association.namespace,
"name": association.name,
"address": association.address,
});
rules.insert(name.clone(), association_json);
}
Value::Object(rules)
}
pub fn serialize_file_capabilities(
&mut self,
filter: Option<String>,
) -> serde_json::Result<String> {
let associations_json = self.construct_json_for_capabilities_associations(filter);
let mut fc_json = serde_json::to_value(self.clone())?;
fc_json
.as_object_mut()
.unwrap()
.insert("rules".to_string(), associations_json);
if let Some(map_features) = fc_json.get("map_features") {
if map_features.as_object().is_some_and(|m| m.is_empty()) {
fc_json.as_object_mut().unwrap().remove("map_features");
}
}
serde_json::to_string(&fc_json)
}
#[cfg(feature = "properties")]
fn get_format(extractor: &Box<dyn extractor::Extractor + '_>) -> Result<FileFormat> {
Ok(extractor.format())
}
#[cfg(feature = "properties")]
fn get_arch(extractor: &Box<dyn extractor::Extractor + '_>) -> Result<FileArchitecture> {
extractor.arch()
}
#[cfg(feature = "properties")]
fn get_os(extractor: &Box<dyn extractor::Extractor + '_>, raw: Option<&[u8]>) -> Result<Os> {
match extractor.format() {
FileFormat::PE | FileFormat::DOTNET => Ok(Os::WINDOWS),
FileFormat::Macho => match raw {
Some(bytes) => extractor::smda::classify_macho_os(bytes),
None => Ok(Os::MACOS),
},
_ => Ok(Os::LINUX),
}
}
}
fn find_function_capabilities<'a>(
ruleset: &'a rules::RuleSet,
extractor: &Box<dyn extractor::Extractor + '_>,
f: &Box<dyn extractor::Function>,
logger: &(dyn Fn(&str) + Sync + Send),
map_features_by_scope: &mut HashMap<&'static str, HashMap<rules::features::Feature, Vec<u64>>>,
features_dump: bool,
) -> Result<(
HashMap<&'a rules::Rule, Vec<(u64, (bool, Vec<u64>))>>,
HashMap<&'a rules::Rule, Vec<(u64, (bool, Vec<u64>))>>,
usize,
)> {
let mut function_features: HashMap<rules::features::Feature, Vec<u64>> = HashMap::new();
for (feature, va) in extractor.extract_global_features()? {
if features_dump {
map_features_by_scope
.entry("function")
.or_default()
.entry(feature.clone())
.or_default()
.push(va);
}
function_features.entry(feature).or_default().push(va);
}
for (feature, va) in extractor.extract_function_features(f)? {
if features_dump {
map_features_by_scope
.entry("function")
.or_default()
.entry(feature.clone())
.or_default()
.push(va);
}
function_features.entry(feature).or_default().push(va);
}
if extractor.is_dot_net() {
for (feature, va) in extractor.extract_file_features()? {
if features_dump {
map_features_by_scope
.entry("file")
.or_default()
.entry(feature.clone())
.or_default()
.push(va);
}
function_features.entry(feature).or_default().push(va);
}
}
let blocks = extractor.get_basic_blocks(f)?;
let mut bb_matches: HashMap<&crate::rules::Rule, Vec<(u64, (bool, Vec<u64>))>> = HashMap::new();
for bb in blocks.iter() {
let mut bb_features: HashMap<crate::rules::features::Feature, Vec<u64>> = HashMap::new();
for (feature, va) in itertools::chain!(
extractor.extract_basic_block_features(f, &bb)?,
extractor.extract_global_features()?
) {
if features_dump {
map_features_by_scope
.entry("basic_block")
.or_default()
.entry(feature.clone())
.or_default()
.push(va);
}
bb_features.entry(feature.clone()).or_default().push(va);
function_features.entry(feature).or_default().push(va);
}
let insns = extractor.get_instructions(f, &bb)?;
for insn in insns.iter() {
for (feature, va) in extractor.extract_insn_features(f, insn)? {
if features_dump {
map_features_by_scope
.entry("instruction")
.or_default()
.entry(feature.clone())
.or_default()
.push(va);
}
bb_features.entry(feature.clone()).or_default().push(va);
function_features.entry(feature).or_default().push(va);
}
}
let (_, matches) = match_fn(&ruleset.basic_block_rules, &bb_features, bb.0, logger)?;
for (rule, res) in matches {
bb_matches
.entry(rule)
.or_default()
.extend(res.iter().cloned());
index_rule_matches(
&mut function_features,
rule,
res.iter().map(|&(va, _)| va).collect(),
)?;
}
}
let (_, function_matches) = match_fn(
&ruleset.function_rules,
&function_features,
&f.offset(),
logger,
)?;
Ok((function_matches, bb_matches, function_features.len()))
}
fn aggregate_matches<'a, T: Clone>(
all_matches: &mut HashMap<&'a rules::Rule, Vec<T>>,
new_matches: &HashMap<&'a rules::Rule, Vec<T>>,
) {
for (rule, res) in new_matches {
all_matches.entry(rule).or_default().extend(res.clone());
}
}
fn find_capabilities(
ruleset: &rules::RuleSet,
extractor: &Box<dyn extractor::Extractor + '_>,
library_function: &(dyn Fn(u64) -> Option<String> + Sync + Send),
logger: &(dyn Fn(&str) + Sync + Send),
features_dump: bool,
) -> Result<(
HashMap<rules::Rule, Vec<(u64, (bool, Vec<u64>))>>,
HashMap<u64, usize>,
// 0.5.0 (D3): flat dump (backwards-compat). Derived from the
// scope-keyed accumulator by collapsing scope keys.
HashMap<String, HashMap<String, HashSet<u64>>>,
BTreeMap<u64, String>,
// 0.5.0 (D3): scope-keyed dump. Outer key is the scope name
// ("file" | "function" | "basic_block" | "instruction"); inner
// shape matches the flat dump. Empty when `features_dump` is
// false. Parity with Python capa's per-scope feature breakdown.
HashMap<String, HashMap<String, HashMap<String, HashSet<u64>>>>,
)> {
use rayon::prelude::*;
let mut globals_map: HashMap<crate::rules::features::Feature, Vec<u64>> = HashMap::new();
for (feat, va) in extractor.extract_global_features()? {
globals_map.entry(feat).or_default().push(va);
}
let pruned_ruleset = ruleset.filter_rules_by_meta_features(&globals_map)?;
let ruleset: &rules::RuleSet = &pruned_ruleset;
let mut all_function_matches: HashMap<&rules::Rule, Vec<(u64, (bool, Vec<u64>))>> =
HashMap::new();
let mut all_bb_matches: HashMap<&rules::Rule, Vec<(u64, (bool, Vec<u64>))>> = HashMap::new();
let mut meta = HashMap::new();
let functions = extractor.get_functions()?;
logger("functions capabilities started");
let mut map_features_by_scope: HashMap<
&'static str,
HashMap<crate::rules::features::Feature, Vec<u64>>,
> = HashMap::new();
let mut library_functions: BTreeMap<u64, String> = BTreeMap::new();
let mut real_functions: Vec<_> = Vec::with_capacity(functions.len());
let total_input = functions.len();
for (addr, f) in functions.iter() {
match library_function(*addr) {
Some(name) => {
logger(&format!(
"flirt: skipping library function at 0x{:02x} as {}",
addr, name
));
library_functions.insert(*addr, name);
}
None => {
real_functions.push((addr, f));
}
}
}
logger(&format!(
"flirt: {} library / {} non-library out of {} total functions",
library_functions.len(),
real_functions.len(),
total_input
));
let function_list = real_functions;
let total = function_list.len();
let per_function: Vec<(u64, _, _, usize, HashMap<_, _>)> = function_list
.par_iter()
.enumerate()
.map(|(index, (function_address, f))| -> Result<_> {
let mut local_by_scope: HashMap<
&'static str,
HashMap<crate::rules::features::Feature, Vec<u64>>,
> = HashMap::new();
let (function_matches, bb_matches, feature_count) = find_function_capabilities(
ruleset,
extractor,
f,
logger,
&mut local_by_scope,
features_dump,
)?;
logger(&format!(
"function 0x{:02x} {} from {} processed",
function_address, index, total
));
Ok((
**function_address,
function_matches,
bb_matches,
feature_count,
local_by_scope,
))
})
.collect::<Result<Vec<_>>>()?;
for (addr, function_matches, bb_matches, feature_count, local_by_scope) in per_function {
meta.insert(addr, feature_count);
aggregate_matches(&mut all_function_matches, &function_matches);
aggregate_matches(&mut all_bb_matches, &bb_matches);
if features_dump {
for (scope, scope_map) in local_by_scope {
let dest = map_features_by_scope.entry(scope).or_default();
for (k, v) in scope_map {
dest.entry(k).or_default().extend(v);
}
}
}
}
logger("functions capabilities finish");
let mut function_and_lower_features = HashMap::new();
for (rule, results) in itertools::chain!(&all_function_matches, &all_bb_matches) {
let locations: Vec<u64> = results.iter().map(|a| a.0).collect();
index_rule_matches(&mut function_and_lower_features, rule, locations)?;
}
let (all_file_matches, feature_count) = find_file_capabilities(
ruleset,
extractor,
&function_and_lower_features,
logger,
&mut map_features_by_scope,
features_dump,
)?;
let mut matches = HashMap::new();
for (rule, res) in itertools::chain!(&all_bb_matches, &all_function_matches, &all_file_matches)
{
matches.insert((*rule).clone(), res.clone());
}
meta.insert(0, feature_count);
let mut map_features_string: HashMap<String, HashMap<String, HashSet<u64>>> = HashMap::new();
let mut map_features_by_scope_string: HashMap<
String,
HashMap<String, HashMap<String, HashSet<u64>>>,
> = HashMap::new();
for (scope, scope_map) in &map_features_by_scope {
let scope_string = (*scope).to_string();
let scope_dest = map_features_by_scope_string
.entry(scope_string)
.or_default();
for (key, offsets) in scope_map {
let feature_type = key.get_name();
let feature_value = key.get_value()?;
let by_value = scope_dest
.entry(feature_type.clone())
.or_default()
.entry(feature_value.clone())
.or_default();
for offset in offsets {
by_value.insert(*offset);
}
let flat_by_value = map_features_string
.entry(feature_type)
.or_default()
.entry(feature_value)
.or_default();
for offset in offsets {
flat_by_value.insert(*offset);
}
}
}
Ok((
matches,
meta,
map_features_string,
library_functions,
map_features_by_scope_string,
))
}
fn find_file_capabilities<'a>(
ruleset: &'a rules::RuleSet,
extractor: &Box<dyn extractor::Extractor + '_>,
function_features: &HashMap<rules::features::Feature, Vec<u64>>,
logger: &(dyn Fn(&str) + Sync + Send),
map_features_by_scope: &mut HashMap<&'static str, HashMap<rules::features::Feature, Vec<u64>>>,
features_dump: bool,
) -> Result<(
HashMap<&'a rules::Rule, Vec<(u64, (bool, Vec<u64>))>>,
usize,
)> {
let mut file_features: HashMap<rules::features::Feature, Vec<u64>> = HashMap::new();
for (feature, va) in itertools::chain!(
extractor.extract_file_features()?,
extractor.extract_global_features()?
) {
if features_dump {
map_features_by_scope
.entry("file")
.or_default()
.entry(feature.clone())
.or_default()
.push(va);
}
file_features.entry(feature.clone()).or_default().push(va);
}
for (feature, addresses) in function_features {
file_features
.entry(feature.clone())
.or_default()
.extend(addresses.iter().cloned());
}
let (_, matches) = match_fn(&ruleset.file_rules, &file_features, &0x0, logger)?;
Ok((matches, file_features.len()))
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FunctionCapabilities {
#[serde(serialize_with = "to_hex", deserialize_with = "from_hex")]
address: usize,
features: usize,
capabilities: Vec<String>,
}
fn parse_parts_id(s: &str) -> Result<(Vec<String>, String)> {
let re = &*PARTS_ID_RE;
if let Some(caps) = re.captures(s) {
let parts_str = caps.get(1).map_or("", |m| m.as_str());
let parts: Vec<String> = parts_str
.split("::")
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
.collect();
let id = caps.get(2).map_or("", |m| m.as_str()).to_string();
Ok((parts, id))
} else {
Err(Error::InvalidRule(0, s.to_string()))
}
}
#[cfg(feature = "properties")]
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Properties {
pub format: FileFormat,
pub arch: FileArchitecture,
pub os: Os,
#[serde(serialize_with = "to_hex", deserialize_with = "from_hex")]
pub base_address: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub pdb_guid: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub pdb_age: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub pdb_filename: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub struct Attacks {
pub id: String,
pub subtechnique: String,
pub tactic: String,
pub technique: String,
}
impl Attacks {
fn from_str(s: &str) -> Result<Self> {
let (parts, id) = parse_parts_id(s)?;
let tactic = parts.first().cloned().unwrap_or_default();
let technique = parts.get(1).cloned().unwrap_or_default();
let subtechnique = parts.get(2).cloned().unwrap_or_default();
Ok(Self {
tactic,
technique,
subtechnique,
id,
})
}
}
impl Default for Attacks {
fn default() -> Self {
Attacks {
id: "".to_string(),
subtechnique: "".to_string(),
tactic: "".to_string(),
technique: "".to_string(),
}
}
}
#[derive(Debug, Serialize, Deserialize, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub struct Mbc {
pub behavior: String,
pub id: String,
pub objective: String,
pub method: String,
}
impl Default for Mbc {
fn default() -> Self {
Mbc {
behavior: "".to_string(),
id: "".to_string(),
objective: "".to_string(),
method: "".to_string(),
}
}
}
impl Mbc {
fn from_str(s: &str) -> Result<Self> {
let (parts, id) = parse_parts_id(s)?;
let objective = parts.first().cloned().unwrap_or_default();
let behavior = parts.get(1).cloned().unwrap_or_default();
let method = parts.get(2).cloned().unwrap_or_default();
Ok(Self {
objective,
behavior,
method,
id,
})
}
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct CapabilityAssociation {
pub attack: BTreeSet<Attacks>,
pub mbc: BTreeSet<Mbc>,
pub namespace: String,
pub name: String,
pub address: usize,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct FileCapabilities {
#[cfg(feature = "properties")]
pub properties: Properties,
pub attacks: BTreeMap<String, BTreeSet<String>>,
pub mbc: BTreeMap<String, BTreeSet<String>>,
pub capability_namespaces: BTreeMap<String, String>,
#[cfg(feature = "verbose")]
pub features: usize,
#[cfg(feature = "verbose")]
pub functions_capabilities: BTreeMap<u64, FunctionCapabilities>,
pub tags: BTreeSet<String>,
pub security_checks: BTreeSet<SecurityCheckStatus>,
pub map_features: HashMap<String, HashMap<String, HashSet<u64>>>,
pub capabilities_associations: BTreeMap<String, CapabilityAssociation>,
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub library_functions: BTreeMap<u64, String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub feature_counts: HashMap<u64, usize>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub map_features_by_scope: HashMap<String, HashMap<String, HashMap<String, HashSet<u64>>>>,
}
fn match_fn<'a>(
rules: &'a [rules::Rule],
features: &HashMap<rules::features::Feature, Vec<u64>>,
va: &u64,
logger: &(dyn Fn(&str) + Sync + Send),
) -> Result<(
HashMap<rules::features::Feature, Vec<u64>>,
HashMap<&'a rules::Rule, Vec<(u64, (bool, Vec<u64>))>>,
)> {
let mut results: HashMap<&rules::Rule, Vec<(u64, (bool, Vec<u64>))>> = HashMap::new();
let mut features = features.clone();
for (_index, rule) in rules.iter().enumerate() {
logger(&format!(
"\t\t\tmatches rule {} from {}",
_index,
rules.len()
));
if let Ok(res) = rule.evaluate(&features) {
if res.0 {
match results.get_mut(rule) {
Some(s) => {
s.push((*va, res));
}
_ => {
results.insert(rule, vec![(*va, res)]);
}
}
index_rule_matches(&mut features, rule, vec![*va])?;
}
}
}
Ok((features, results))
}
fn index_rule_matches(
features: &mut HashMap<rules::features::Feature, Vec<u64>>,
rule: &rules::Rule,
locations: Vec<u64>,
) -> Result<()> {
let matched_rule_feature = rules::features::Feature::MatchedRule(
rules::features::MatchedRuleFeature::new(&rule.name, "")?,
);
features
.entry(matched_rule_feature.clone())
.or_default()
.extend(locations.iter().cloned());
if let Some(Yaml::String(namespace)) = rule.meta.get(&Yaml::String("namespace".to_string())) {
let parts: Vec<&str> = namespace.split('/').collect();
for i in 0..parts.len() {
let sub_namespace = parts[..=i].join("/");
let ns_feature = crate::rules::features::Feature::MatchedRule(
crate::rules::features::MatchedRuleFeature::new(&sub_namespace, "")?,
);
features
.entry(ns_feature)
.or_default()
.extend(locations.iter().cloned());
}
}
Ok(())
}
fn get_format(f: &str) -> Result<(FileFormat, Vec<u8>)> {
let buffer = std::fs::read(f)?;
if buffer.starts_with(b"MZ") {
Ok((FileFormat::PE, buffer))
} else if buffer.starts_with(b"\x7fELF") {
Ok((FileFormat::ELF, buffer))
} else if is_macho_magic(&buffer) {
Ok((FileFormat::Macho, buffer))
} else {
Err(Error::UnsupportedFormatError)
}
}
fn is_macho_magic(buf: &[u8]) -> bool {
if buf.len() < 4 {
return false;
}
matches!(
&buf[..4],
b"\xfe\xed\xfa\xce" | b"\xce\xfa\xed\xfe" | b"\xfe\xed\xfa\xcf" | b"\xcf\xfa\xed\xfe" | b"\xca\xfe\xba\xbe" | b"\xbe\xba\xfe\xca" | b"\xca\xfe\xba\xbf" | b"\xbf\xba\xfe\xca" )
}
fn get_file_extractors<'a>(
f: &str,
format: FileFormat,
data: &'a [u8],
high_accuracy: bool,
resolve_tailcalls: bool,
) -> Result<Box<dyn extractor::Extractor + 'a>> {
match format {
FileFormat::PE => {
if let Ok(e) = extractor::dnfile::Extractor::new(data) {
Ok(Box::new(e))
} else {
Ok(Box::new(extractor::smda::Extractor::new(
f,
high_accuracy,
resolve_tailcalls,
data,
)?))
}
}
FileFormat::ELF | FileFormat::Macho => Ok(Box::new(extractor::smda::Extractor::new(
f,
high_accuracy,
resolve_tailcalls,
data,
)?)),
_ => Ok(Box::new(extractor::smda::Extractor::new(
f,
high_accuracy,
resolve_tailcalls,
data,
)?)),
}
}