yara_x/scanner/mod.rs
1/*! This module implements the YARA scanner.
2
3The scanner takes the rules produces by the compiler and scans data with them.
4*/
5use std::collections::{hash_map, BTreeMap, HashMap};
6use std::fmt::{Debug, Formatter};
7use std::fs;
8use std::io::Read;
9use std::mem::transmute;
10use std::ops::Range;
11use std::path::{Path, PathBuf};
12use std::pin::Pin;
13use std::slice::Iter;
14use std::sync::atomic::AtomicU64;
15use std::sync::Once;
16use std::time::Duration;
17
18use bitvec::prelude::*;
19use memmap2::{Mmap, MmapOptions};
20use protobuf::{CodedInputStream, MessageDyn};
21use thiserror::Error;
22use wasmtime::Store;
23
24use crate::compiler::{RuleId, Rules};
25use crate::models::Rule;
26use crate::modules::{Module, ModuleError, BUILTIN_MODULES};
27use crate::scanner::context::create_wasm_store_and_ctx;
28use crate::types::{Struct, TypeValue};
29use crate::variables::VariableError;
30use crate::wasm::MATCHING_RULES_BITMAP_BASE;
31use crate::{modules, Variable};
32
33pub(crate) use crate::scanner::context::RuntimeObject;
34pub(crate) use crate::scanner::context::RuntimeObjectHandle;
35pub(crate) use crate::scanner::context::ScanContext;
36pub(crate) use crate::scanner::context::ScanState;
37pub(crate) use crate::scanner::matches::Match;
38
39mod context;
40mod matches;
41
42pub mod blocks;
43
44#[cfg(test)]
45mod tests;
46
47/// Error returned when a scan operation fails.
48#[derive(Error, Debug)]
49#[non_exhaustive]
50pub enum ScanError {
51 /// The scan was aborted after the timeout period.
52 #[error("timeout")]
53 Timeout,
54 /// Could not open the scanned file.
55 #[error("can not open `{path}`: {err}")]
56 OpenError {
57 /// Path of the file being scanned.
58 path: PathBuf,
59 /// Error that occurred.
60 err: std::io::Error,
61 },
62 /// Could not map the scanned file into memory.
63 #[error("can not map `{path}`: {err}")]
64 MapError {
65 /// Path of the file being scanned.
66 path: PathBuf,
67 /// Error that occurred.
68 err: std::io::Error,
69 },
70 /// Could not deserialize the protobuf message for some YARA module.
71 #[error("can not deserialize protobuf message for YARA module `{module}`: {err}")]
72 ProtoError {
73 /// Module name.
74 module: String,
75 /// Error that occurred.
76 err: protobuf::Error,
77 },
78 /// The module is unknown.
79 #[error("unknown module `{module}`")]
80 UnknownModule {
81 /// Module name.
82 module: String,
83 },
84 /// Some module produced an error when it was invoked.
85 #[error("error in module `{module}`: {err}")]
86 ModuleError {
87 /// Module name.
88 module: String,
89 /// Error that occurred.
90 err: ModuleError,
91 },
92}
93
94/// Global counter that gets incremented every 1 second by a dedicated thread.
95///
96/// This counter is used for determining when a scan operation has timed out.
97static HEARTBEAT_COUNTER: AtomicU64 = AtomicU64::new(0);
98
99/// Used for spawning the thread that increments `HEARTBEAT_COUNTER`.
100static INIT_HEARTBEAT: Once = Once::new();
101
102/// Represents the data being scanned.
103///
104/// The scanned data can be backed by a slice owned by someone else, or a
105/// vector or memory-mapped file owned by `ScannedData` itself.
106pub enum ScannedData<'d> {
107 Slice(&'d [u8]),
108 Vec(Vec<u8>),
109 Mmap(Mmap),
110}
111
112impl AsRef<[u8]> for ScannedData<'_> {
113 fn as_ref(&self) -> &[u8] {
114 match self {
115 ScannedData::Slice(s) => s,
116 ScannedData::Vec(v) => v.as_ref(),
117 ScannedData::Mmap(m) => m.as_ref(),
118 }
119 }
120}
121
122impl<'d> TryInto<ScannedData<'d>> for &'d [u8] {
123 type Error = ScanError;
124 fn try_into(self) -> Result<ScannedData<'d>, Self::Error> {
125 Ok(ScannedData::Slice(self))
126 }
127}
128
129impl<'d, const N: usize> TryInto<ScannedData<'d>> for &'d [u8; N] {
130 type Error = ScanError;
131 fn try_into(self) -> Result<ScannedData<'d>, Self::Error> {
132 Ok(ScannedData::Slice(self))
133 }
134}
135
136/// Contains information about the time spent on a rule.
137#[cfg(feature = "rules-profiling")]
138pub struct ProfilingData<'r> {
139 /// Rule namespace.
140 pub namespace: &'r str,
141 /// Rule name.
142 pub rule: &'r str,
143 /// Time spent executing the rule's condition.
144 pub condition_exec_time: Duration,
145 /// Time spent matching the rule's patterns.
146 pub pattern_matching_time: Duration,
147}
148
149/// Optional information for the scan operation.
150#[derive(Debug, Default)]
151pub struct ScanOptions<'a> {
152 module_metadata: HashMap<&'a str, &'a [u8]>,
153}
154
155impl<'a> ScanOptions<'a> {
156 /// Creates a new instance of `ScanOptions` with no additional information
157 /// for the scan operation.
158 ///
159 /// Use other methods to add additional information.
160 pub fn new() -> Self {
161 Self { module_metadata: Default::default() }
162 }
163
164 /// Adds metadata for a YARA module.
165 pub fn set_module_metadata(
166 mut self,
167 module_name: &'a str,
168 metadata: &'a [u8],
169 ) -> Self {
170 self.module_metadata.insert(module_name, metadata);
171 self
172 }
173}
174
175/// Scans data with already compiled YARA rules.
176///
177/// The scanner receives a set of compiled [`Rules`] and scans data with those
178/// rules. The same scanner can be used for scanning multiple files or
179/// in-memory data sequentially, but you need multiple scanners for scanning in
180/// parallel.
181pub struct Scanner<'r> {
182 _rules: &'r Rules,
183 wasm_store: Pin<Box<Store<ScanContext<'static, 'static>>>>,
184 use_mmap: bool,
185}
186
187impl<'r> Scanner<'r> {
188 /// Creates a new scanner.
189 pub fn new(rules: &'r Rules) -> Self {
190 let wasm_store = create_wasm_store_and_ctx(rules);
191 Self { _rules: rules, wasm_store, use_mmap: true }
192 }
193
194 /// Sets a timeout for scan operations.
195 ///
196 /// The scan functions will return an [ScanError::Timeout] once the
197 /// provided timeout duration has elapsed. The scanner will make every
198 /// effort to stop promptly after the designated timeout duration. However,
199 /// in some cases, particularly with rules containing only a few patterns,
200 /// the scanner could potentially continue running for a longer period than
201 /// the specified timeout.
202 pub fn set_timeout(&mut self, timeout: Duration) -> &mut Self {
203 self.scan_context_mut().set_timeout(timeout);
204 self
205 }
206
207 /// Sets the maximum number of matches per pattern.
208 ///
209 /// When some pattern reaches the maximum number of patterns it won't
210 /// produce more matches.
211 pub fn max_matches_per_pattern(&mut self, n: usize) -> &mut Self {
212 self.scan_context_mut().pattern_matches.max_matches_per_pattern(n);
213 self
214 }
215
216 /// Specifies whether [`Scanner::scan_file`] and [`Scanner::scan_file_with_options`]
217 /// may use memory-mapped files to read input.
218 ///
219 /// By default, the scanner uses memory mapping for very large files, as this
220 /// is typically faster than copying file contents into memory. However, this
221 /// approach has a drawback: if another process truncates the file during
222 /// scanning, a `SIGBUS` signal may occur.
223 ///
224 /// Setting this option disables memory mapping and forces the scanner to
225 /// always read files into an in-memory buffer instead. This method is slower,
226 /// but safer.
227 pub fn use_mmap(&mut self, yes: bool) -> &mut Self {
228 self.use_mmap = yes;
229 self
230 }
231
232 /// Sets a callback that is invoked every time a YARA rule calls the
233 /// `console` module.
234 ///
235 /// The `callback` function is invoked with a string representing the
236 /// message being logged. The function can print the message to stdout,
237 /// append it to a file, etc. If no callback is set these messages are
238 /// ignored.
239 pub fn console_log<F>(&mut self, callback: F) -> &mut Self
240 where
241 F: FnMut(String) + 'r,
242 {
243 self.scan_context_mut().console_log = Some(Box::new(callback));
244 self
245 }
246
247 /// Scans in-memory data.
248 pub fn scan<'a>(
249 &'a mut self,
250 data: &'a [u8],
251 ) -> Result<ScanResults<'a, 'r>, ScanError> {
252 self.scan_impl(data.try_into()?, None)
253 }
254
255 /// Scans a file.
256 pub fn scan_file<'a, P>(
257 &'a mut self,
258 target: P,
259 ) -> Result<ScanResults<'a, 'r>, ScanError>
260 where
261 P: AsRef<Path>,
262 {
263 self.scan_impl(self.load_file(target.as_ref())?, None)
264 }
265
266 /// Like [`Scanner::scan`], but allows to specify additional scan options.
267 pub fn scan_with_options<'a, 'opts>(
268 &'a mut self,
269 data: &'a [u8],
270 options: ScanOptions<'opts>,
271 ) -> Result<ScanResults<'a, 'r>, ScanError> {
272 self.scan_impl(ScannedData::Slice(data), Some(options))
273 }
274
275 /// Like [`Scanner::scan_file`], but allows to specify additional scan
276 /// options.
277 pub fn scan_file_with_options<'opts, P>(
278 &mut self,
279 target: P,
280 options: ScanOptions<'opts>,
281 ) -> Result<ScanResults<'_, 'r>, ScanError>
282 where
283 P: AsRef<Path>,
284 {
285 self.scan_impl(self.load_file(target.as_ref())?, Some(options))
286 }
287
288 /// Sets the value of a global variable.
289 ///
290 /// The variable must has been previously defined by calling
291 /// [`crate::Compiler::define_global`], and the type it has during the
292 /// definition must match the type of the new value (`T`).
293 ///
294 /// The variable will retain the new value in subsequent scans, unless this
295 /// function is called again for setting a new value.
296 pub fn set_global<T: TryInto<Variable>>(
297 &mut self,
298 ident: &str,
299 value: T,
300 ) -> Result<&mut Self, VariableError>
301 where
302 VariableError: From<<T as TryInto<Variable>>::Error>,
303 {
304 self.scan_context_mut().set_global(ident, value)?;
305 Ok(self)
306 }
307
308 /// Sets the output data for a YARA module.
309 ///
310 /// Each YARA module generates an output consisting of a data structure that
311 /// contains information about the scanned file. This data structure is
312 /// represented by a Protocol Buffer message. Typically, you won't need to
313 /// provide this data yourself, as the YARA module automatically generates
314 /// different outputs for each file it scans.
315 ///
316 /// However, there are two scenarios in which you may want to provide the
317 /// output for a module yourself:
318 ///
319 /// 1) When the module does not produce any output on its own.
320 /// 2) When you already know the output of the module for the upcoming file
321 /// to be scanned, and you prefer to reuse this data instead of generating
322 /// it again.
323 ///
324 /// Case 1) applies to certain modules lacking a main function, thus
325 /// incapable of producing any output on their own. For such modules, you
326 /// must set the output before scanning the associated data. Since the
327 /// module's output typically varies with each scanned file, you need to
328 /// call [`Scanner::set_module_output`] prior to each invocation of
329 /// [`Scanner::scan`]. Once [`Scanner::scan`] is executed, the module's
330 /// output is consumed and will be empty unless set again before the
331 /// subsequent call.
332 ///
333 /// Case 2) applies when you have previously stored the module's output for
334 /// certain scanned data. In such cases, when rescanning the data, you can
335 /// utilize this function to supply the module's output, thereby preventing
336 /// redundant computation by the module. This optimization enhances
337 /// performance by eliminating the need for the module to reparse the
338 /// scanned data.
339 ///
340 /// <br>
341 ///
342 /// The `data` argument must be a Protocol Buffer message corresponding
343 /// to any of the existing YARA modules.
344 pub fn set_module_output(
345 &mut self,
346 data: Box<dyn MessageDyn>,
347 ) -> Result<&mut Self, ScanError> {
348 let descriptor = data.descriptor_dyn();
349 let full_name = descriptor.full_name();
350
351 // Check if the protobuf message passed to this function corresponds
352 // with any of the existing modules.
353 if !BUILTIN_MODULES
354 .iter()
355 .any(|m| m.1.root_struct_descriptor.full_name() == full_name)
356 {
357 return Err(ScanError::UnknownModule {
358 module: full_name.to_string(),
359 });
360 }
361
362 self.scan_context_mut()
363 .user_provided_module_outputs
364 .insert(full_name.to_string(), data);
365
366 Ok(self)
367 }
368
369 /// Similar to [`Scanner::set_module_output`], but receives a module name
370 /// and the protobuf message as raw data.
371 ///
372 /// `name` can be either the YARA module name (i.e: "pe", "elf", "dotnet",
373 /// etc.) or the fully-qualified name for the protobuf message associated
374 /// to the module (i.e: "pe.PE", "elf.ELF", "dotnet.Dotnet", etc.).
375 pub fn set_module_output_raw(
376 &mut self,
377 name: &str,
378 data: &[u8],
379 ) -> Result<&mut Self, ScanError> {
380 // Try to find the module by name first, if not found, then try
381 // to find a module where the fully-qualified name for its protobuf
382 // message matches the `name` arguments.
383 let descriptor = if let Some(module) = BUILTIN_MODULES.get(name) {
384 Some(&module.root_struct_descriptor)
385 } else {
386 BUILTIN_MODULES.values().find_map(|module| {
387 if module.root_struct_descriptor.full_name() == name {
388 Some(&module.root_struct_descriptor)
389 } else {
390 None
391 }
392 })
393 };
394
395 if descriptor.is_none() {
396 return Err(ScanError::UnknownModule { module: name.to_string() });
397 }
398
399 let mut is = CodedInputStream::from_bytes(data);
400
401 // Default recursion limit is 100, that's not enough for some deeply
402 // nested structures like the process tree in the `vt` module.
403 is.set_recursion_limit(500);
404
405 self.set_module_output(
406 descriptor.unwrap().parse_from(&mut is).map_err(|err| {
407 ScanError::ProtoError { module: name.to_string(), err }
408 })?,
409 )
410 }
411
412 /// Returns profiling data for the slowest N rules.
413 ///
414 /// The profiling data reflects the cumulative execution time of each rule
415 /// across all scanned files. This information is useful for identifying
416 /// performance bottlenecks. To reset the profiling data and start fresh
417 /// for subsequent scans, use [`Scanner::clear_profiling_data`].
418 #[cfg(feature = "rules-profiling")]
419 pub fn slowest_rules(&self, n: usize) -> Vec<ProfilingData<'_>> {
420 self.scan_context().slowest_rules(n)
421 }
422
423 /// Clears all accumulated profiling data.
424 ///
425 /// This method resets the profiling data collected during rule execution
426 /// across scanned files. Use this to start a new profiling session, ensuring
427 /// the results reflect only the data gathered after this method is called.
428 #[cfg(feature = "rules-profiling")]
429 pub fn clear_profiling_data(&mut self) {
430 self.scan_context_mut().clear_profiling_data()
431 }
432}
433
434impl<'r> Scanner<'r> {
435 #[cfg(feature = "rules-profiling")]
436 #[inline]
437 fn scan_context<'a>(&self) -> &ScanContext<'r, 'a> {
438 unsafe {
439 transmute::<&ScanContext<'static, 'static>, &ScanContext<'r, '_>>(
440 self.wasm_store.data(),
441 )
442 }
443 }
444 #[inline]
445 fn scan_context_mut<'a>(&mut self) -> &mut ScanContext<'r, 'a> {
446 unsafe {
447 transmute::<
448 &mut ScanContext<'static, 'static>,
449 &mut ScanContext<'r, '_>,
450 >(self.wasm_store.data_mut())
451 }
452 }
453
454 fn load_file<'a>(
455 &self,
456 path: &Path,
457 ) -> Result<ScannedData<'a>, ScanError> {
458 let mut file = fs::File::open(path).map_err(|err| {
459 ScanError::OpenError { path: path.to_path_buf(), err }
460 })?;
461
462 let size = file.metadata().map(|m| m.len()).unwrap_or(0);
463
464 let mut buffered_file;
465 let mapped_file;
466
467 // For files smaller than ~500MB reading the whole file is faster than
468 // using a memory-mapped file.
469 let data = if self.use_mmap && size > 500_000_000 {
470 mapped_file = unsafe {
471 MmapOptions::new().map_copy_read_only(&file).map_err(|err| {
472 ScanError::MapError { path: path.to_path_buf(), err }
473 })
474 }?;
475 ScannedData::Mmap(mapped_file)
476 } else {
477 buffered_file = Vec::with_capacity(size as usize);
478 file.read_to_end(&mut buffered_file).map_err(|err| {
479 ScanError::OpenError { path: path.to_path_buf(), err }
480 })?;
481 ScannedData::Vec(buffered_file)
482 };
483
484 Ok(data)
485 }
486
487 fn scan_impl<'a, 'opts>(
488 &'a mut self,
489 data: ScannedData<'a>,
490 options: Option<ScanOptions<'opts>>,
491 ) -> Result<ScanResults<'a, 'r>, ScanError> {
492 let ctx = self.scan_context_mut();
493
494 // Clear information about matches found in a previous scan, if any.
495 ctx.reset();
496
497 // Set the global variable `filesize` to the size of the scanned data.
498 ctx.set_filesize(data.as_ref().len() as i64);
499
500 // Indicate that the scanner is currently scanning the given data.
501 ctx.scan_state = ScanState::ScanningData(data);
502
503 for module_name in ctx.compiled_rules.imports() {
504 // Lookup the module in the list of built-in modules.
505 let module = modules::BUILTIN_MODULES
506 .get(module_name)
507 .unwrap_or_else(|| panic!("module `{module_name}` not found"));
508
509 let root_struct_name = module.root_struct_descriptor.full_name();
510
511 let module_output;
512 // If the user already provided some output for the module by
513 // calling `Scanner::set_module_output`, use that output. If not,
514 // call the module's main function (if the module has a main
515 // function) for getting its output.
516 if let Some(output) =
517 ctx.user_provided_module_outputs.remove(root_struct_name)
518 {
519 module_output = Some(output);
520 } else {
521 let meta: Option<&'opts [u8]> =
522 options.as_ref().and_then(|options| {
523 options.module_metadata.get(module_name).copied()
524 });
525
526 if let Some(main_fn) = module.main_fn {
527 module_output = Some(
528 main_fn(ctx.scanned_data().unwrap(), meta).map_err(
529 |err| ScanError::ModuleError {
530 module: module_name.to_string(),
531 err,
532 },
533 )?,
534 );
535 } else {
536 module_output = None;
537 }
538 }
539
540 if let Some(module_output) = &module_output {
541 // Make sure that the module is returning a protobuf message of
542 // the expected type.
543 debug_assert_eq!(
544 module_output.descriptor_dyn().full_name(),
545 module.root_struct_descriptor.full_name(),
546 "main function of module `{}` must return `{}`, but returned `{}`",
547 module_name,
548 module.root_struct_descriptor.full_name(),
549 module_output.descriptor_dyn().full_name(),
550 );
551
552 // Make sure that the module is returning a protobuf message
553 // where all required fields are initialized. This only applies
554 // to proto2, proto3 doesn't have "required" fields, all fields
555 // are optional.
556 debug_assert!(
557 module_output.is_initialized_dyn(),
558 "module `{}` returned a protobuf `{}` where some required fields are not initialized ",
559 module_name,
560 module.root_struct_descriptor.full_name()
561 );
562 }
563
564 // When constant folding is enabled we don't need to generate
565 // structure fields for enums. This is because during the
566 // optimization process symbols like MyEnum.ENUM_ITEM are resolved
567 // to their constant values at compile time. In other words, the
568 // compiler determines that MyEnum.ENUM_ITEM is equal to some value
569 // X, and uses that value in the generated code.
570 //
571 // However, without constant folding, enums are treated as any
572 // other field in a struct, and their values are determined at scan
573 // time. For that reason these fields must be generated for enums
574 // when constant folding is disabled.
575 let generate_fields_for_enums =
576 !cfg!(feature = "constant-folding");
577
578 let module_struct = Struct::from_proto_descriptor_and_msg(
579 &module.root_struct_descriptor,
580 module_output.as_deref(),
581 generate_fields_for_enums,
582 );
583
584 if let Some(module_output) = module_output {
585 ctx.module_outputs
586 .insert(root_struct_name.to_string(), module_output);
587 }
588
589 // The data structure obtained from the module is added to the
590 // root structure. Any data from previous scans will be replaced
591 // with the new data structure.
592 ctx.root_struct
593 .add_field(module_name, TypeValue::Struct(module_struct));
594 }
595
596 // The user provided module outputs are not needed anymore. Let's
597 // clear any remaining entry in the hash map (which can happen if
598 // the user has set outputs for modules that are not even imported
599 // by the rules.
600 ctx.user_provided_module_outputs.clear();
601
602 // Evaluate the conditions of every rule, this will call
603 // `ScanContext::search_for_patterns` if necessary.
604 ctx.eval_conditions()?;
605
606 let data = match ctx.scan_state.take() {
607 ScanState::ScanningData(data) => data,
608 _ => unreachable!(),
609 };
610
611 ctx.scan_state = ScanState::Finished(DataSnippets::SingleBlock(data));
612
613 Ok(ScanResults::new(ctx))
614 }
615}
616
617/// Helper type that exposes the data matched during a scan operation.
618///
619/// Matching data can be accessed through the [`Match::data`] method. Normally,
620/// this data can be retrieved by slicing directly into the scanned input.
621/// However, that requires the original input to remain valid until the scan
622/// results are processed. This works fine for a single contiguous block of
623/// memory, but is impractical when scanning multiple blocks, since holding
624/// onto all of them until the end would consume excessive memory.
625///
626/// To handle this, two strategies are used:
627///
628/// - **Single-block scans**: Data is accessed directly from the input slice.
629/// - **Multi-block scans**: Matching fragments are copied and retained in a
630/// BTreeMap until the results are processed. The keys in the btree are
631/// the offsets where the snippets start and the values are vectors with
632/// the snippet's data.
633///
634/// Each strategy corresponds to a variant in this enum.
635pub(crate) enum DataSnippets<'d> {
636 SingleBlock(ScannedData<'d>),
637 MultiBlock(BTreeMap<usize, Vec<u8>>),
638}
639
640impl DataSnippets<'_> {
641 pub(crate) fn get(&self, range: Range<usize>) -> Option<&[u8]> {
642 match self {
643 Self::SingleBlock(data) => data.as_ref().get(range),
644 Self::MultiBlock(btree) => {
645 // Find in the btree the snippet that starts exactly at the
646 // offset indicated by range.start, if not found, take the
647 // previous one, which may also contain the requested range.
648 let (snippet_offset, snippet_data) =
649 btree.range(..=range.start).next_back()?;
650
651 // Calculate the start and end of the slice within the snippet.
652 let start = range.start - snippet_offset;
653 let end = range.end - snippet_offset;
654
655 // Returns the data, or `None` if `start` and `end` are not
656 // within the snippet boundaries.
657 snippet_data.get(start..end)
658 }
659 }
660 }
661}
662
663/// Results of a scan operation.
664///
665/// Allows iterating over both the matching and non-matching rules.
666pub struct ScanResults<'a, 'r> {
667 ctx: &'a ScanContext<'r, 'a>,
668}
669
670impl Debug for ScanResults<'_, '_> {
671 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
672 f.write_str("ScanResults")
673 }
674}
675
676impl<'a, 'r> ScanResults<'a, 'r> {
677 fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
678 Self { ctx }
679 }
680
681 /// Returns an iterator that yields the matching rules in arbitrary order.
682 pub fn matching_rules(&self) -> MatchingRules<'_, 'r> {
683 MatchingRules::new(self.ctx)
684 }
685
686 /// Returns an iterator that yields the non-matching rules in arbitrary
687 /// order.
688 pub fn non_matching_rules(&self) -> NonMatchingRules<'_, 'r> {
689 NonMatchingRules::new(self.ctx)
690 }
691
692 /// Returns the protobuf produced by a YARA module after processing the
693 /// data.
694 ///
695 /// The result will be `None` if the module doesn't exist or didn't
696 /// produce any output.
697 pub fn module_output(
698 &self,
699 module_name: &str,
700 ) -> Option<&'a dyn MessageDyn> {
701 let module = BUILTIN_MODULES.get(module_name)?;
702 let module_output = self
703 .ctx
704 .module_outputs
705 .get(module.root_struct_descriptor.full_name())?
706 .as_ref();
707 Some(module_output)
708 }
709
710 /// Returns an iterator that yields tuples composed of a YARA module name
711 /// and the protobuf produced by that module.
712 ///
713 /// Only returns the modules that produced some output.
714 pub fn module_outputs(&self) -> ModuleOutputs<'a, 'r> {
715 ModuleOutputs::new(self.ctx)
716 }
717}
718
719/// Iterator that yields the rules that matched during a scan.
720///
721/// Private rules are not included by default, use
722/// [`MatchingRules::include_private`] for changing this behaviour.
723pub struct MatchingRules<'a, 'r> {
724 ctx: &'a ScanContext<'r, 'a>,
725 iterator: Iter<'a, RuleId>,
726 len_non_private: usize,
727 len_private: usize,
728 include_private: bool,
729}
730
731impl<'a, 'r> MatchingRules<'a, 'r> {
732 fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
733 Self {
734 ctx,
735 iterator: ctx.matching_rules.iter(),
736 include_private: false,
737 len_non_private: ctx.matching_rules.len()
738 - ctx.num_matching_private_rules,
739 len_private: ctx.num_matching_private_rules,
740 }
741 }
742
743 /// Specifies whether the iterator should yield private rules.
744 ///
745 /// This does not reset the iterator to its initial state, the iterator will
746 /// continue from its current position.
747 pub fn include_private(mut self, yes: bool) -> Self {
748 self.include_private = yes;
749 self
750 }
751}
752
753impl<'a, 'r> Iterator for MatchingRules<'a, 'r> {
754 type Item = Rule<'a, 'r>;
755
756 fn next(&mut self) -> Option<Self::Item> {
757 let rules = self.ctx.compiled_rules;
758 loop {
759 let rule_id = *self.iterator.next()?;
760 let rule_info = rules.get(rule_id);
761 if rule_info.is_private {
762 self.len_private -= 1;
763 } else {
764 self.len_non_private -= 1;
765 }
766 if self.include_private || !rule_info.is_private {
767 return Some(Rule { ctx: Some(self.ctx), rule_info, rules });
768 }
769 }
770 }
771}
772
773impl ExactSizeIterator for MatchingRules<'_, '_> {
774 #[inline]
775 fn len(&self) -> usize {
776 if self.include_private {
777 self.len_non_private + self.len_private
778 } else {
779 self.len_non_private
780 }
781 }
782}
783
784/// Iterator that yields the rules that didn't match during a scan.
785///
786/// Private rules are not included by default, use
787/// [`NonMatchingRules::include_private`] for changing this behaviour.
788pub struct NonMatchingRules<'a, 'r> {
789 ctx: &'a ScanContext<'r, 'a>,
790 iterator: bitvec::slice::IterZeros<'a, u8, Lsb0>,
791 include_private: bool,
792 len_private: usize,
793 len_non_private: usize,
794}
795
796impl<'a, 'r> NonMatchingRules<'a, 'r> {
797 fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
798 let num_rules = ctx.compiled_rules.num_rules();
799 let main_memory = ctx
800 .wasm_main_memory
801 .unwrap()
802 .data(unsafe { ctx.wasm_store.as_ref() });
803
804 let base = MATCHING_RULES_BITMAP_BASE as usize;
805
806 // Create a BitSlice that covers the region of main memory containing
807 // the bitmap that tells which rules matched and which did not.
808 let matching_rules_bitmap = BitSlice::<_, Lsb0>::from_slice(
809 &main_memory[base..base + num_rules / 8 + 1],
810 );
811
812 // The BitSlice will cover more bits than necessary, for example, if
813 // there are 3 rules the BitSlice will have 8 bits because it is
814 // created from a u8 slice that has 1 byte. Here we make sure that
815 // the BitSlice has exactly as many bits as existing rules.
816 let matching_rules_bitmap = &matching_rules_bitmap[0..num_rules];
817
818 Self {
819 ctx,
820 iterator: matching_rules_bitmap.iter_zeros(),
821 include_private: false,
822 len_non_private: ctx.compiled_rules.num_rules()
823 - ctx.matching_rules.len()
824 - ctx.num_non_matching_private_rules,
825 len_private: ctx.num_non_matching_private_rules,
826 }
827 }
828
829 /// Specifies whether the iterator should yield private rules.
830 ///
831 /// This does not reset the iterator to its initial state, the iterator will
832 /// continue from its current position.
833 pub fn include_private(mut self, yes: bool) -> Self {
834 self.include_private = yes;
835 self
836 }
837}
838
839impl<'a, 'r> Iterator for NonMatchingRules<'a, 'r> {
840 type Item = Rule<'a, 'r>;
841
842 fn next(&mut self) -> Option<Self::Item> {
843 let rules = self.ctx.compiled_rules;
844
845 loop {
846 let rule_id = RuleId::from(self.iterator.next()?);
847 let rule_info = rules.get(rule_id);
848
849 if rule_info.is_private {
850 self.len_private -= 1;
851 } else {
852 self.len_non_private -= 1;
853 }
854
855 if self.include_private || !rule_info.is_private {
856 return Some(Rule { ctx: Some(self.ctx), rule_info, rules });
857 }
858 }
859 }
860}
861
862impl ExactSizeIterator for NonMatchingRules<'_, '_> {
863 #[inline]
864 fn len(&self) -> usize {
865 if self.include_private {
866 self.len_non_private + self.len_private
867 } else {
868 self.len_non_private
869 }
870 }
871}
872
873/// Iterator that returns the outputs produced by YARA modules.
874pub struct ModuleOutputs<'a, 'r> {
875 ctx: &'a ScanContext<'r, 'a>,
876 len: usize,
877 iterator: hash_map::Iter<'a, &'a str, Module>,
878}
879
880impl<'a, 'r> ModuleOutputs<'a, 'r> {
881 fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
882 Self {
883 ctx,
884 len: ctx.module_outputs.len(),
885 iterator: BUILTIN_MODULES.iter(),
886 }
887 }
888}
889
890impl ExactSizeIterator for ModuleOutputs<'_, '_> {
891 #[inline]
892 fn len(&self) -> usize {
893 self.len
894 }
895}
896
897impl<'a> Iterator for ModuleOutputs<'a, '_> {
898 type Item = (&'a str, &'a dyn MessageDyn);
899
900 fn next(&mut self) -> Option<Self::Item> {
901 loop {
902 let (name, module) = self.iterator.next()?;
903 if let Some(module_output) = self
904 .ctx
905 .module_outputs
906 .get(module.root_struct_descriptor.full_name())
907 {
908 return Some((*name, module_output.as_ref()));
909 }
910 }
911 }
912}
913
914#[cfg(test)]
915mod snippet_tests {
916 use super::DataSnippets;
917 use std::collections::BTreeMap;
918
919 #[test]
920 fn snippets() {
921 let mut btree_map = BTreeMap::new();
922
923 btree_map.insert(0, vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
924 btree_map.insert(50, vec![51, 52, 53, 54]);
925
926 let snippets = DataSnippets::MultiBlock(btree_map);
927
928 assert_eq!(snippets.get(0..2), Some([1, 2].as_slice()));
929 assert_eq!(snippets.get(1..3), Some([2, 3].as_slice()));
930 assert_eq!(snippets.get(8..9), Some([9].as_slice()));
931 assert_eq!(snippets.get(9..10), None);
932 assert_eq!(snippets.get(50..51), Some([51].as_slice()));
933 assert_eq!(snippets.get(50..54), Some([51, 52, 53, 54].as_slice()));
934 assert_eq!(snippets.get(52..54), Some([53, 54].as_slice()));
935 assert_eq!(snippets.get(50..56), None);
936 }
937}