memf_strings/
yara_scanner.rs1use std::path::Path;
10
11#[derive(Debug, Clone)]
13pub struct YaraScanMatch {
14 pub rule_name: String,
16 pub tags: Vec<String>,
18 pub match_offset: u64,
20 pub region_base: u64,
22 pub region_size: usize,
24 pub matched_strings: Vec<MatchedPattern>,
26}
27
28#[derive(Debug, Clone)]
30pub struct MatchedPattern {
31 pub identifier: String,
33 pub offset: u64,
35 pub data: Vec<u8>,
37}
38
39pub struct YaraMemoryScanner {
41 rules: yara_x::Rules,
42}
43
44impl YaraMemoryScanner {
45 pub fn from_source(source: &str) -> crate::Result<Self> {
47 let rules = yara_x::compile(source).map_err(|e| crate::Error::Yara(e.to_string()))?;
48 Ok(Self { rules })
49 }
50
51 pub fn from_rules_dir(dir: &Path) -> crate::Result<Self> {
53 let mut compiler = yara_x::Compiler::new();
54 let mut found = false;
55
56 if dir.is_dir() {
57 for entry in std::fs::read_dir(dir)? {
58 let entry = entry?;
59 let path = entry.path();
60 if let Some(ext) = path.extension() {
61 if ext == "yar" || ext == "yara" {
62 let source = std::fs::read_to_string(&path)?;
63 compiler
64 .add_source(source.as_str())
65 .map_err(|e| crate::Error::Yara(e.to_string()))?;
66 found = true;
67 }
68 }
69 }
70 }
71
72 if !found {
73 return Err(crate::Error::Yara(format!(
74 "no .yar/.yara files found in {}",
75 dir.display()
76 )));
77 }
78
79 let rules = compiler.build();
80 Ok(Self { rules })
81 }
82
83 pub fn scan_region(&self, data: &[u8], region_base: u64) -> crate::Result<Vec<YaraScanMatch>> {
88 if data.is_empty() {
89 return Ok(Vec::new());
90 }
91
92 let mut scanner = yara_x::Scanner::new(&self.rules);
93 let scan_results = scanner
94 .scan(data)
95 .map_err(|e| crate::Error::Yara(e.to_string()))?;
96
97 let mut matches = Vec::new();
98
99 for rule in scan_results.matching_rules() {
100 let tags: Vec<String> = rule.tags().map(|t| t.identifier().to_string()).collect();
101
102 let mut matched_strings = Vec::new();
103 let mut first_offset = u64::MAX;
104
105 for pattern in rule.patterns() {
106 for m in pattern.matches() {
107 let offset = m.range().start as u64;
108 if offset < first_offset {
109 first_offset = offset;
110 }
111 let matched_data: Vec<u8> =
112 data[m.range().start..m.range().end.min(m.range().start + 64)].to_vec();
113 matched_strings.push(MatchedPattern {
114 identifier: pattern.identifier().to_string(),
115 offset,
116 data: matched_data,
117 });
118 }
119 }
120
121 if first_offset == u64::MAX {
122 first_offset = 0;
123 }
124
125 matches.push(YaraScanMatch {
126 rule_name: rule.identifier().to_string(),
127 tags,
128 match_offset: first_offset,
129 region_base,
130 region_size: data.len(),
131 matched_strings,
132 });
133 }
134
135 Ok(matches)
136 }
137
138 pub fn scan_regions(&self, regions: &[(u64, &[u8])]) -> crate::Result<Vec<YaraScanMatch>> {
142 let mut all_matches = Vec::new();
143 for &(base, data) in regions {
144 let mut region_matches = self.scan_region(data, base)?;
145 all_matches.append(&mut region_matches);
146 }
147 Ok(all_matches)
148 }
149}
150
151#[cfg(test)]
152mod tests {
153 use super::*;
154
155 const SIMPLE_RULE: &str = r#"
156rule detect_mz_header {
157 meta:
158 description = "Detects MZ PE header"
159 strings:
160 $mz = { 4D 5A 90 00 }
161 condition:
162 $mz
163}
164"#;
165
166 const TAGGED_RULE: &str = r#"
167rule shellcode_nopsled : shellcode suspicious {
168 meta:
169 description = "Detects NOP sled"
170 strings:
171 $nop = { 90 90 90 90 90 90 90 90 }
172 condition:
173 $nop
174}
175"#;
176
177 const MULTI_RULE: &str = r"
178rule detect_mz {
179 strings:
180 $mz = { 4D 5A }
181 condition:
182 $mz
183}
184
185rule detect_elf {
186 strings:
187 $elf = { 7F 45 4C 46 }
188 condition:
189 $elf
190}
191";
192
193 #[test]
194 fn from_source_compiles_valid_rules() {
195 let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
196 let _ = scanner;
198 }
199
200 #[test]
201 fn from_source_rejects_invalid_rules() {
202 let result = YaraMemoryScanner::from_source("this is not valid yara");
203 assert!(result.is_err());
204 }
205
206 #[test]
207 fn scan_region_detects_mz_header() {
208 let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
209
210 let mut data = vec![0u8; 256];
212 data[0] = 0x4D; data[1] = 0x5A; data[2] = 0x90;
215 data[3] = 0x00;
216
217 let matches = scanner.scan_region(&data, 0x7FFE_0000).unwrap();
218 assert_eq!(matches.len(), 1);
219 assert_eq!(matches[0].rule_name, "detect_mz_header");
220 assert_eq!(matches[0].region_base, 0x7FFE_0000);
221 assert_eq!(matches[0].region_size, 256);
222 assert!(!matches[0].matched_strings.is_empty());
223 assert_eq!(matches[0].matched_strings[0].identifier, "$mz");
224 assert_eq!(matches[0].matched_strings[0].offset, 0);
225 }
226
227 #[test]
228 fn scan_region_no_match() {
229 let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
230
231 let data = vec![0xCCu8; 256];
233 let matches = scanner.scan_region(&data, 0x1000).unwrap();
234 assert!(matches.is_empty());
235 }
236
237 #[test]
238 fn scan_region_with_tags() {
239 let scanner = YaraMemoryScanner::from_source(TAGGED_RULE).unwrap();
240
241 let mut data = vec![0u8; 256];
243 for slot in data.iter_mut().take(16) {
244 *slot = 0x90; }
246
247 let matches = scanner.scan_region(&data, 0x4000).unwrap();
248 assert_eq!(matches.len(), 1);
249 assert_eq!(matches[0].rule_name, "shellcode_nopsled");
250 assert!(matches[0].tags.contains(&"shellcode".to_string()));
251 assert!(matches[0].tags.contains(&"suspicious".to_string()));
252 }
253
254 #[test]
255 fn scan_regions_aggregates_results() {
256 let scanner = YaraMemoryScanner::from_source(MULTI_RULE).unwrap();
257
258 let mut region1 = vec![0u8; 128];
260 region1[0] = 0x4D;
261 region1[1] = 0x5A;
262
263 let mut region2 = vec![0u8; 128];
265 region2[0] = 0x7F;
266 region2[1] = 0x45; region2[2] = 0x4C; region2[3] = 0x46; let regions: Vec<(u64, &[u8])> = vec![(0x1000, ®ion1), (0x2000, ®ion2)];
271 let matches = scanner.scan_regions(®ions).unwrap();
272
273 assert_eq!(matches.len(), 2);
275 let rule_names: Vec<&str> = matches.iter().map(|m| m.rule_name.as_str()).collect();
276 assert!(rule_names.contains(&"detect_mz"));
277 assert!(rule_names.contains(&"detect_elf"));
278
279 let mz_match = matches.iter().find(|m| m.rule_name == "detect_mz").unwrap();
281 assert_eq!(mz_match.region_base, 0x1000);
282 let elf_match = matches
283 .iter()
284 .find(|m| m.rule_name == "detect_elf")
285 .unwrap();
286 assert_eq!(elf_match.region_base, 0x2000);
287 }
288
289 #[test]
290 fn scan_empty_buffer_returns_no_matches() {
291 let scanner = YaraMemoryScanner::from_source(SIMPLE_RULE).unwrap();
292 let matches = scanner.scan_region(&[], 0x0).unwrap();
293 assert!(matches.is_empty());
294 }
295
296 #[test]
297 fn matched_pattern_data_truncated_to_64_bytes() {
298 let rule = r"
300rule long_match {
301 strings:
302 $zeros = { 00 00 00 00 00 00 00 00 00 00 }
303 condition:
304 $zeros
305}
306";
307 let scanner = YaraMemoryScanner::from_source(rule).unwrap();
308 let data = vec![0u8; 256];
309 let matches = scanner.scan_region(&data, 0x5000).unwrap();
310 assert_eq!(matches.len(), 1);
311 for mp in &matches[0].matched_strings {
313 assert!(mp.data.len() <= 64);
314 }
315 }
316}