Skip to main content

memf_strings/
yara_scanner.rs

1//! YARA-X memory region scanner.
2//!
3//! Scans raw byte buffers (process memory regions) against compiled YARA rules
4//! to detect malware signatures, shellcode patterns, and IoC indicators in
5//! process address spaces. Unlike `yara_classifier` which scans individual
6//! strings, this module scans arbitrary binary data — critical for detecting
7//! packed/encrypted payloads, injected code, and fileless malware.
8
9use std::path::Path;
10
11/// A match from scanning a memory region against YARA rules.
12#[derive(Debug, Clone)]
13pub struct YaraScanMatch {
14    /// YARA rule identifier that matched.
15    pub rule_name: String,
16    /// Rule tags (e.g., "malware", "apt", "ransomware").
17    pub tags: Vec<String>,
18    /// Offset within the scanned buffer where the first pattern matched.
19    pub match_offset: u64,
20    /// Virtual address of the region that was scanned.
21    pub region_base: u64,
22    /// Length of the scanned region.
23    pub region_size: usize,
24    /// Matched string identifiers and their offsets within the buffer.
25    pub matched_strings: Vec<MatchedPattern>,
26}
27
28/// A single pattern match within a YARA scan result.
29#[derive(Debug, Clone)]
30pub struct MatchedPattern {
31    /// The YARA string identifier (e.g., "$mz_header", "$shellcode").
32    pub identifier: String,
33    /// Offset within the buffer where this pattern matched.
34    pub offset: u64,
35    /// The matched bytes (truncated to first 64 bytes if longer).
36    pub data: Vec<u8>,
37}
38
39/// Scanner that applies compiled YARA rules to raw memory buffers.
40pub struct YaraMemoryScanner {
41    rules: yara_x::Rules,
42}
43
44impl YaraMemoryScanner {
45    /// Compile YARA rules from source text.
46    pub fn from_source(source: &str) -> crate::Result<Self> {
47        let rules = yara_x::compile(source).map_err(|e| crate::Error::Yara(e.to_string()))?;
48        Ok(Self { rules })
49    }
50
51    /// Load and compile all `.yar` / `.yara` files from a directory.
52    pub fn from_rules_dir(dir: &Path) -> crate::Result<Self> {
53        let mut compiler = yara_x::Compiler::new();
54        let mut found = false;
55
56        if dir.is_dir() {
57            for entry in std::fs::read_dir(dir)? {
58                let entry = entry?;
59                let path = entry.path();
60                if let Some(ext) = path.extension() {
61                    if ext == "yar" || ext == "yara" {
62                        let source = std::fs::read_to_string(&path)?;
63                        compiler
64                            .add_source(source.as_str())
65                            .map_err(|e| crate::Error::Yara(e.to_string()))?;
66                        found = true;
67                    }
68                }
69            }
70        }
71
72        if !found {
73            return Err(crate::Error::Yara(format!(
74                "no .yar/.yara files found in {}",
75                dir.display()
76            )));
77        }
78
79        let rules = compiler.build();
80        Ok(Self { rules })
81    }
82
83    /// Scan a raw byte buffer against the compiled rules.
84    ///
85    /// `region_base` is the virtual address of the memory region being scanned
86    /// (used for reporting, not for the scan itself).
87    pub fn scan_region(&self, data: &[u8], region_base: u64) -> crate::Result<Vec<YaraScanMatch>> {
88        if data.is_empty() {
89            return Ok(Vec::new());
90        }
91
92        let mut scanner = yara_x::Scanner::new(&self.rules);
93        let scan_results = scanner
94            .scan(data)
95            .map_err(|e| crate::Error::Yara(e.to_string()))?;
96
97        let mut matches = Vec::new();
98
99        for rule in scan_results.matching_rules() {
100            let tags: Vec<String> = rule.tags().map(|t| t.identifier().to_string()).collect();
101
102            let mut matched_strings = Vec::new();
103            let mut first_offset = u64::MAX;
104
105            for pattern in rule.patterns() {
106                for m in pattern.matches() {
107                    let offset = m.range().start as u64;
108                    if offset < first_offset {
109                        first_offset = offset;
110                    }
111                    let matched_data: Vec<u8> =
112                        data[m.range().start..m.range().end.min(m.range().start + 64)].to_vec();
113                    matched_strings.push(MatchedPattern {
114                        identifier: pattern.identifier().to_string(),
115                        offset,
116                        data: matched_data,
117                    });
118                }
119            }
120
121            if first_offset == u64::MAX {
122                first_offset = 0;
123            }
124
125            matches.push(YaraScanMatch {
126                rule_name: rule.identifier().to_string(),
127                tags,
128                match_offset: first_offset,
129                region_base,
130                region_size: data.len(),
131                matched_strings,
132            });
133        }
134
135        Ok(matches)
136    }
137
138    /// Scan multiple memory regions and aggregate results.
139    ///
140    /// Each tuple is `(region_base_vaddr, region_bytes)`.
141    pub fn scan_regions(&self, regions: &[(u64, &[u8])]) -> crate::Result<Vec<YaraScanMatch>> {
142        let mut all_matches = Vec::new();
143        for &(base, data) in regions {
144            let mut region_matches = self.scan_region(data, base)?;
145            all_matches.append(&mut region_matches);
146        }
147        Ok(all_matches)
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    const SIMPLE_RULE: &str = r#"
156rule detect_mz_header {
157    meta:
158        description = "Detects MZ PE header"
159    strings:
160        $mz = { 4D 5A 90 00 }
161    condition:
162        $mz
163}
164"#;
165
166    const TAGGED_RULE: &str = r#"
167rule shellcode_nopsled : shellcode suspicious {
168    meta:
169        description = "Detects NOP sled"
170    strings:
171        $nop = { 90 90 90 90 90 90 90 90 }
172    condition:
173        $nop
174}
175"#;
176
177    const MULTI_RULE: &str = r"
178rule detect_mz {
179    strings:
180        $mz = { 4D 5A }
181    condition:
182        $mz
183}
184
185rule detect_elf {
186    strings:
187        $elf = { 7F 45 4C 46 }
188    condition:
189        $elf
190}
191";
192
193    #[test]
194    fn from_source_compiles_valid_rules() {
195        let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
196        // Should succeed without error — scanner is created
197        let _ = scanner;
198    }
199
200    #[test]
201    fn from_source_rejects_invalid_rules() {
202        let result = YaraMemoryScanner::from_source("this is not valid yara");
203        assert!(result.is_err());
204    }
205
206    #[test]
207    fn scan_region_detects_mz_header() {
208        let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
209
210        // Buffer with MZ header at offset 0
211        let mut data = vec![0u8; 256];
212        data[0] = 0x4D; // M
213        data[1] = 0x5A; // Z
214        data[2] = 0x90;
215        data[3] = 0x00;
216
217        let matches = scanner.scan_region(&data, 0x7FFE_0000).unwrap();
218        assert_eq!(matches.len(), 1);
219        assert_eq!(matches[0].rule_name, "detect_mz_header");
220        assert_eq!(matches[0].region_base, 0x7FFE_0000);
221        assert_eq!(matches[0].region_size, 256);
222        assert!(!matches[0].matched_strings.is_empty());
223        assert_eq!(matches[0].matched_strings[0].identifier, "$mz");
224        assert_eq!(matches[0].matched_strings[0].offset, 0);
225    }
226
227    #[test]
228    fn scan_region_no_match() {
229        let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
230
231        // Buffer with no MZ header
232        let data = vec![0xCCu8; 256];
233        let matches = scanner.scan_region(&data, 0x1000).unwrap();
234        assert!(matches.is_empty());
235    }
236
237    #[test]
238    fn scan_region_with_tags() {
239        let scanner = YaraMemoryScanner::from_source(TAGGED_RULE).unwrap();
240
241        // Buffer with NOP sled
242        let mut data = vec![0u8; 256];
243        for slot in data.iter_mut().take(16) {
244            *slot = 0x90; // NOP sled
245        }
246
247        let matches = scanner.scan_region(&data, 0x4000).unwrap();
248        assert_eq!(matches.len(), 1);
249        assert_eq!(matches[0].rule_name, "shellcode_nopsled");
250        assert!(matches[0].tags.contains(&"shellcode".to_string()));
251        assert!(matches[0].tags.contains(&"suspicious".to_string()));
252    }
253
254    #[test]
255    fn scan_regions_aggregates_results() {
256        let scanner = YaraMemoryScanner::from_source(MULTI_RULE).unwrap();
257
258        // Region 1: MZ header
259        let mut region1 = vec![0u8; 128];
260        region1[0] = 0x4D;
261        region1[1] = 0x5A;
262
263        // Region 2: ELF header
264        let mut region2 = vec![0u8; 128];
265        region2[0] = 0x7F;
266        region2[1] = 0x45; // E
267        region2[2] = 0x4C; // L
268        region2[3] = 0x46; // F
269
270        let regions: Vec<(u64, &[u8])> = vec![(0x1000, &region1), (0x2000, &region2)];
271        let matches = scanner.scan_regions(&regions).unwrap();
272
273        // Should find detect_mz in region1 and detect_elf in region2
274        assert_eq!(matches.len(), 2);
275        let rule_names: Vec<&str> = matches.iter().map(|m| m.rule_name.as_str()).collect();
276        assert!(rule_names.contains(&"detect_mz"));
277        assert!(rule_names.contains(&"detect_elf"));
278
279        // Verify correct region_base assignment
280        let mz_match = matches.iter().find(|m| m.rule_name == "detect_mz").unwrap();
281        assert_eq!(mz_match.region_base, 0x1000);
282        let elf_match = matches
283            .iter()
284            .find(|m| m.rule_name == "detect_elf")
285            .unwrap();
286        assert_eq!(elf_match.region_base, 0x2000);
287    }
288
289    #[test]
290    fn scan_empty_buffer_returns_no_matches() {
291        let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
292        let matches = scanner.scan_region(&[], 0x0).unwrap();
293        assert!(matches.is_empty());
294    }
295
296    #[test]
297    fn matched_pattern_data_truncated_to_64_bytes() {
298        // Rule that matches a long pattern
299        let rule = r"
300rule long_match {
301    strings:
302        $zeros = { 00 00 00 00 00 00 00 00 00 00 }
303    condition:
304        $zeros
305}
306";
307        let scanner = YaraMemoryScanner::from_source(rule).unwrap();
308        let data = vec![0u8; 256];
309        let matches = scanner.scan_region(&data, 0x5000).unwrap();
310        assert_eq!(matches.len(), 1);
311        // The matched data should be at most 64 bytes
312        for mp in &matches[0].matched_strings {
313            assert!(mp.data.len() <= 64);
314        }
315    }
316}