use std::path::Path;
#[derive(Debug, Clone)]
pub struct YaraScanMatch {
pub rule_name: String,
pub tags: Vec<String>,
pub match_offset: u64,
pub region_base: u64,
pub region_size: usize,
pub matched_strings: Vec<MatchedPattern>,
}
#[derive(Debug, Clone)]
pub struct MatchedPattern {
pub identifier: String,
pub offset: u64,
pub data: Vec<u8>,
}
pub struct YaraMemoryScanner {
rules: yara_x::Rules,
}
impl YaraMemoryScanner {
pub fn from_source(source: &str) -> crate::Result<Self> {
let rules = yara_x::compile(source).map_err(|e| crate::Error::Yara(e.to_string()))?;
Ok(Self { rules })
}
pub fn from_rules_dir(dir: &Path) -> crate::Result<Self> {
let mut compiler = yara_x::Compiler::new();
let mut found = false;
if dir.is_dir() {
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if let Some(ext) = path.extension() {
if ext == "yar" || ext == "yara" {
let source = std::fs::read_to_string(&path)?;
compiler
.add_source(source.as_str())
.map_err(|e| crate::Error::Yara(e.to_string()))?;
found = true;
}
}
}
}
if !found {
return Err(crate::Error::Yara(format!(
"no .yar/.yara files found in {}",
dir.display()
)));
}
let rules = compiler.build();
Ok(Self { rules })
}
pub fn scan_region(&self, data: &[u8], region_base: u64) -> crate::Result<Vec<YaraScanMatch>> {
if data.is_empty() {
return Ok(Vec::new());
}
let mut scanner = yara_x::Scanner::new(&self.rules);
let scan_results = scanner
.scan(data)
.map_err(|e| crate::Error::Yara(e.to_string()))?;
let mut matches = Vec::new();
for rule in scan_results.matching_rules() {
let tags: Vec<String> = rule.tags().map(|t| t.identifier().to_string()).collect();
let mut matched_strings = Vec::new();
let mut first_offset = u64::MAX;
for pattern in rule.patterns() {
for m in pattern.matches() {
let offset = m.range().start as u64;
if offset < first_offset {
first_offset = offset;
}
let matched_data: Vec<u8> =
data[m.range().start..m.range().end.min(m.range().start + 64)].to_vec();
matched_strings.push(MatchedPattern {
identifier: pattern.identifier().to_string(),
offset,
data: matched_data,
});
}
}
if first_offset == u64::MAX {
first_offset = 0;
}
matches.push(YaraScanMatch {
rule_name: rule.identifier().to_string(),
tags,
match_offset: first_offset,
region_base,
region_size: data.len(),
matched_strings,
});
}
Ok(matches)
}
pub fn scan_regions(&self, regions: &[(u64, &[u8])]) -> crate::Result<Vec<YaraScanMatch>> {
let mut all_matches = Vec::new();
for &(base, data) in regions {
let mut region_matches = self.scan_region(data, base)?;
all_matches.append(&mut region_matches);
}
Ok(all_matches)
}
}
#[cfg(test)]
mod tests {
use super::*;
const SIMPLE_RULE: &str = r#"
rule detect_mz_header {
meta:
description = "Detects MZ PE header"
strings:
$mz = { 4D 5A 90 00 }
condition:
$mz
}
"#;
const TAGGED_RULE: &str = r#"
rule shellcode_nopsled : shellcode suspicious {
meta:
description = "Detects NOP sled"
strings:
$nop = { 90 90 90 90 90 90 90 90 }
condition:
$nop
}
"#;
const MULTI_RULE: &str = r"
rule detect_mz {
strings:
$mz = { 4D 5A }
condition:
$mz
}
rule detect_elf {
strings:
$elf = { 7F 45 4C 46 }
condition:
$elf
}
";
#[test]
fn from_source_compiles_valid_rules() {
let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
let _ = scanner;
}
#[test]
fn from_source_rejects_invalid_rules() {
let result = YaraMemoryScanner::from_source("this is not valid yara");
assert!(result.is_err());
}
#[test]
fn scan_region_detects_mz_header() {
let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
let mut data = vec![0u8; 256];
data[0] = 0x4D; data[1] = 0x5A; data[2] = 0x90;
data[3] = 0x00;
let matches = scanner.scan_region(&data, 0x7FFE_0000).unwrap();
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].rule_name, "detect_mz_header");
assert_eq!(matches[0].region_base, 0x7FFE_0000);
assert_eq!(matches[0].region_size, 256);
assert!(!matches[0].matched_strings.is_empty());
assert_eq!(matches[0].matched_strings[0].identifier, "$mz");
assert_eq!(matches[0].matched_strings[0].offset, 0);
}
#[test]
fn scan_region_no_match() {
let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
let data = vec![0xCCu8; 256];
let matches = scanner.scan_region(&data, 0x1000).unwrap();
assert!(matches.is_empty());
}
#[test]
fn scan_region_with_tags() {
let scanner = YaraMemoryScanner::from_source(TAGGED_RULE).unwrap();
let mut data = vec![0u8; 256];
for slot in data.iter_mut().take(16) {
*slot = 0x90; }
let matches = scanner.scan_region(&data, 0x4000).unwrap();
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].rule_name, "shellcode_nopsled");
assert!(matches[0].tags.contains(&"shellcode".to_string()));
assert!(matches[0].tags.contains(&"suspicious".to_string()));
}
#[test]
fn scan_regions_aggregates_results() {
let scanner = YaraMemoryScanner::from_source(MULTI_RULE).unwrap();
let mut region1 = vec![0u8; 128];
region1[0] = 0x4D;
region1[1] = 0x5A;
let mut region2 = vec![0u8; 128];
region2[0] = 0x7F;
region2[1] = 0x45; region2[2] = 0x4C; region2[3] = 0x46;
let regions: Vec<(u64, &[u8])> = vec![(0x1000, ®ion1), (0x2000, ®ion2)];
let matches = scanner.scan_regions(®ions).unwrap();
assert_eq!(matches.len(), 2);
let rule_names: Vec<&str> = matches.iter().map(|m| m.rule_name.as_str()).collect();
assert!(rule_names.contains(&"detect_mz"));
assert!(rule_names.contains(&"detect_elf"));
let mz_match = matches.iter().find(|m| m.rule_name == "detect_mz").unwrap();
assert_eq!(mz_match.region_base, 0x1000);
let elf_match = matches
.iter()
.find(|m| m.rule_name == "detect_elf")
.unwrap();
assert_eq!(elf_match.region_base, 0x2000);
}
#[test]
fn scan_empty_buffer_returns_no_matches() {
let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
let matches = scanner.scan_region(&[], 0x0).unwrap();
assert!(matches.is_empty());
}
#[test]
fn matched_pattern_data_truncated_to_64_bytes() {
let rule = r"
rule long_match {
strings:
$zeros = { 00 00 00 00 00 00 00 00 00 00 }
condition:
$zeros
}
";
let scanner = YaraMemoryScanner::from_source(rule).unwrap();
let data = vec![0u8; 256];
let matches = scanner.scan_region(&data, 0x5000).unwrap();
assert_eq!(matches.len(), 1);
for mp in &matches[0].matched_strings {
assert!(mp.data.len() <= 64);
}
}
}