yara_x/scanner/mod.rs
1/*! This module implements the YARA scanner.
2
3The scanner takes the rules produces by the compiler and scans data with them.
4*/
5use std::collections::{BTreeMap, HashMap, hash_map};
6use std::fmt::{Debug, Formatter};
7use std::fs;
8use std::io::Read;
9use std::mem::transmute;
10use std::ops::Range;
11use std::path::{Path, PathBuf};
12use std::pin::Pin;
13use std::slice::Iter;
14use std::sync::Once;
15use std::sync::atomic::AtomicU64;
16use std::time::Duration;
17
18use bitvec::prelude::*;
19use memmap2::{Mmap, MmapOptions};
20use protobuf::{CodedInputStream, MessageDyn};
21use thiserror::Error;
22
23use crate::compiler::{RuleId, Rules};
24use crate::models::Rule;
25use crate::modules::{BUILTIN_MODULES, Module, ModuleError};
26use crate::scanner::context::create_wasm_store_and_ctx;
27use crate::types::{Struct, TypeValue};
28use crate::variables::VariableError;
29use crate::wasm::MATCHING_RULES_BITMAP_BASE;
30use crate::wasm::runtime::Store;
31use crate::{Variable, modules};
32
33pub(crate) use crate::scanner::context::RuntimeObject;
34pub(crate) use crate::scanner::context::RuntimeObjectHandle;
35pub(crate) use crate::scanner::context::ScanContext;
36pub(crate) use crate::scanner::context::ScanState;
37pub(crate) use crate::scanner::matches::Match;
38
39mod context;
40mod matches;
41
42pub mod blocks;
43
44#[cfg(test)]
45mod tests;
46
47/// Error returned when a scan operation fails.
48#[derive(Error, Debug)]
49#[non_exhaustive]
50pub enum ScanError {
51 /// The scan was aborted after the timeout period.
52 #[error("timeout")]
53 Timeout,
54 /// Could not open the scanned file.
55 #[error("can not open `{path}`: {err}")]
56 OpenError {
57 /// Path of the file being scanned.
58 path: PathBuf,
59 /// Error that occurred.
60 err: std::io::Error,
61 },
62 /// Could not map the scanned file into memory.
63 #[error("can not map `{path}`: {err}")]
64 MapError {
65 /// Path of the file being scanned.
66 path: PathBuf,
67 /// Error that occurred.
68 err: std::io::Error,
69 },
70 /// Could not deserialize the protobuf message for some YARA module.
71 #[error(
72 "can not deserialize protobuf message for YARA module `{module}`: {err}"
73 )]
74 ProtoError {
75 /// Module name.
76 module: String,
77 /// Error that occurred.
78 err: protobuf::Error,
79 },
80 /// The module is unknown.
81 #[error("unknown module `{module}`")]
82 UnknownModule {
83 /// Module name.
84 module: String,
85 },
86 /// Some module produced an error when it was invoked.
87 #[error("error in module `{module}`: {err}")]
88 ModuleError {
89 /// Module name.
90 module: String,
91 /// Error that occurred.
92 err: ModuleError,
93 },
94}
95
96/// Global counter that gets incremented every 1 second by a dedicated thread.
97///
98/// This counter is used for determining when a scan operation has timed out.
99static HEARTBEAT_COUNTER: AtomicU64 = AtomicU64::new(0);
100
101/// Used for spawning the thread that increments `HEARTBEAT_COUNTER`.
102static INIT_HEARTBEAT: Once = Once::new();
103
104/// Represents the data being scanned.
105///
106/// The scanned data can be backed by a slice owned by someone else, or a
107/// vector or memory-mapped file owned by `ScannedData` itself.
108pub enum ScannedData<'d> {
109 Slice(&'d [u8]),
110 Vec(Vec<u8>),
111 Mmap(Mmap),
112}
113
114impl AsRef<[u8]> for ScannedData<'_> {
115 fn as_ref(&self) -> &[u8] {
116 match self {
117 ScannedData::Slice(s) => s,
118 ScannedData::Vec(v) => v.as_ref(),
119 ScannedData::Mmap(m) => m.as_ref(),
120 }
121 }
122}
123
124impl<'d> TryInto<ScannedData<'d>> for &'d [u8] {
125 type Error = ScanError;
126 fn try_into(self) -> Result<ScannedData<'d>, Self::Error> {
127 Ok(ScannedData::Slice(self))
128 }
129}
130
131impl<'d, const N: usize> TryInto<ScannedData<'d>> for &'d [u8; N] {
132 type Error = ScanError;
133 fn try_into(self) -> Result<ScannedData<'d>, Self::Error> {
134 Ok(ScannedData::Slice(self))
135 }
136}
137
138/// Contains information about the time spent on a rule.
139#[cfg(feature = "rules-profiling")]
140pub struct ProfilingData<'r> {
141 /// Rule namespace.
142 pub namespace: &'r str,
143 /// Rule name.
144 pub rule: &'r str,
145 /// Time spent executing the rule's condition.
146 pub condition_exec_time: Duration,
147 /// Time spent matching the rule's patterns.
148 pub pattern_matching_time: Duration,
149}
150
151/// Optional information for the scan operation.
152#[derive(Debug, Default)]
153pub struct ScanOptions<'a> {
154 module_metadata: HashMap<&'a str, &'a [u8]>,
155}
156
157impl<'a> ScanOptions<'a> {
158 /// Creates a new instance of `ScanOptions` with no additional information
159 /// for the scan operation.
160 ///
161 /// Use other methods to add additional information.
162 pub fn new() -> Self {
163 Self { module_metadata: Default::default() }
164 }
165
166 /// Adds metadata for a YARA module.
167 pub fn set_module_metadata(
168 mut self,
169 module_name: &'a str,
170 metadata: &'a [u8],
171 ) -> Self {
172 self.module_metadata.insert(module_name, metadata);
173 self
174 }
175}
176
177/// Scans data with already compiled YARA rules.
178///
179/// The scanner receives a set of compiled [`Rules`] and scans data with those
180/// rules. The same scanner can be used for scanning multiple files or
181/// in-memory data sequentially, but you need multiple scanners for scanning in
182/// parallel.
183pub struct Scanner<'r> {
184 _rules: &'r Rules,
185 wasm_store: Pin<Box<Store<ScanContext<'static, 'static>>>>,
186 use_mmap: bool,
187}
188
189impl<'r> Scanner<'r> {
190 /// Creates a new scanner.
191 pub fn new(rules: &'r Rules) -> Self {
192 let wasm_store = create_wasm_store_and_ctx(rules);
193 Self { _rules: rules, wasm_store, use_mmap: true }
194 }
195
196 /// Sets a timeout for scan operations.
197 ///
198 /// The scan functions will return an [ScanError::Timeout] once the
199 /// provided timeout duration has elapsed. The scanner will make every
200 /// effort to stop promptly after the designated timeout duration. However,
201 /// in some cases, particularly with rules containing only a few patterns,
202 /// the scanner could potentially continue running for a longer period than
203 /// the specified timeout.
204 pub fn set_timeout(&mut self, timeout: Duration) -> &mut Self {
205 self.scan_context_mut().set_timeout(timeout);
206 self
207 }
208
209 /// Sets the maximum number of matches per pattern.
210 ///
211 /// When some pattern reaches the maximum number of patterns it won't
212 /// produce more matches.
213 pub fn max_matches_per_pattern(&mut self, n: usize) -> &mut Self {
214 self.scan_context_mut().pattern_matches.max_matches_per_pattern(n);
215 self
216 }
217
218 /// Specifies whether [`Scanner::scan_file`] and [`Scanner::scan_file_with_options`]
219 /// may use memory-mapped files to read input.
220 ///
221 /// By default, the scanner uses memory mapping for very large files, as this
222 /// is typically faster than copying file contents into memory. However, this
223 /// approach has a drawback: if another process truncates the file during
224 /// scanning, a `SIGBUS` signal may occur.
225 ///
226 /// Setting this option disables memory mapping and forces the scanner to
227 /// always read files into an in-memory buffer instead. This method is slower,
228 /// but safer.
229 pub fn use_mmap(&mut self, yes: bool) -> &mut Self {
230 self.use_mmap = yes;
231 self
232 }
233
234 /// Sets a callback that is invoked every time a YARA rule calls the
235 /// `console` module.
236 ///
237 /// The `callback` function is invoked with a string representing the
238 /// message being logged. The function can print the message to stdout,
239 /// append it to a file, etc. If no callback is set these messages are
240 /// ignored.
241 pub fn console_log<F>(&mut self, callback: F) -> &mut Self
242 where
243 F: FnMut(String) + 'r,
244 {
245 self.scan_context_mut().console_log = Some(Box::new(callback));
246 self
247 }
248
249 /// Scans in-memory data.
250 pub fn scan<'a>(
251 &'a mut self,
252 data: &'a [u8],
253 ) -> Result<ScanResults<'a, 'r>, ScanError> {
254 self.scan_impl(data.try_into()?, None)
255 }
256
257 /// Scans a file.
258 pub fn scan_file<'a, P>(
259 &'a mut self,
260 target: P,
261 ) -> Result<ScanResults<'a, 'r>, ScanError>
262 where
263 P: AsRef<Path>,
264 {
265 self.scan_impl(self.load_file(target.as_ref())?, None)
266 }
267
268 /// Like [`Scanner::scan`], but allows to specify additional scan options.
269 pub fn scan_with_options<'a, 'opts>(
270 &'a mut self,
271 data: &'a [u8],
272 options: ScanOptions<'opts>,
273 ) -> Result<ScanResults<'a, 'r>, ScanError> {
274 self.scan_impl(ScannedData::Slice(data), Some(options))
275 }
276
277 /// Like [`Scanner::scan_file`], but allows to specify additional scan
278 /// options.
279 pub fn scan_file_with_options<'opts, P>(
280 &mut self,
281 target: P,
282 options: ScanOptions<'opts>,
283 ) -> Result<ScanResults<'_, 'r>, ScanError>
284 where
285 P: AsRef<Path>,
286 {
287 self.scan_impl(self.load_file(target.as_ref())?, Some(options))
288 }
289
290 /// Sets the value of a global variable.
291 ///
292 /// The variable must has been previously defined by calling
293 /// [`crate::Compiler::define_global`], and the type it has during the
294 /// definition must match the type of the new value (`T`).
295 ///
296 /// The variable will retain the new value in subsequent scans, unless this
297 /// function is called again for setting a new value.
298 pub fn set_global<T: TryInto<Variable>>(
299 &mut self,
300 ident: &str,
301 value: T,
302 ) -> Result<&mut Self, VariableError>
303 where
304 VariableError: From<<T as TryInto<Variable>>::Error>,
305 {
306 self.scan_context_mut().set_global(ident, value)?;
307 Ok(self)
308 }
309
310 /// Sets the output data for a YARA module.
311 ///
312 /// Each YARA module generates an output consisting of a data structure that
313 /// contains information about the scanned file. This data structure is
314 /// represented by a Protocol Buffer message. Typically, you won't need to
315 /// provide this data yourself, as the YARA module automatically generates
316 /// different outputs for each file it scans.
317 ///
318 /// However, there are two scenarios in which you may want to provide the
319 /// output for a module yourself:
320 ///
321 /// 1) When the module does not produce any output on its own.
322 /// 2) When you already know the output of the module for the upcoming file
323 /// to be scanned, and you prefer to reuse this data instead of generating
324 /// it again.
325 ///
326 /// Case 1) applies to certain modules lacking a main function, thus
327 /// incapable of producing any output on their own. For such modules, you
328 /// must set the output before scanning the associated data. Since the
329 /// module's output typically varies with each scanned file, you need to
330 /// call [`Scanner::set_module_output`] prior to each invocation of
331 /// [`Scanner::scan`]. Once [`Scanner::scan`] is executed, the module's
332 /// output is consumed and will be empty unless set again before the
333 /// subsequent call.
334 ///
335 /// Case 2) applies when you have previously stored the module's output for
336 /// certain scanned data. In such cases, when rescanning the data, you can
337 /// utilize this function to supply the module's output, thereby preventing
338 /// redundant computation by the module. This optimization enhances
339 /// performance by eliminating the need for the module to reparse the
340 /// scanned data.
341 ///
342 /// <br>
343 ///
344 /// The `data` argument must be a Protocol Buffer message corresponding
345 /// to any of the existing YARA modules.
346 pub fn set_module_output(
347 &mut self,
348 data: Box<dyn MessageDyn>,
349 ) -> Result<&mut Self, ScanError> {
350 let descriptor = data.descriptor_dyn();
351 let full_name = descriptor.full_name();
352
353 // Check if the protobuf message passed to this function corresponds
354 // with any of the existing modules.
355 if !BUILTIN_MODULES
356 .iter()
357 .any(|m| m.1.root_struct_descriptor.full_name() == full_name)
358 {
359 return Err(ScanError::UnknownModule {
360 module: full_name.to_string(),
361 });
362 }
363
364 self.scan_context_mut()
365 .user_provided_module_outputs
366 .insert(full_name.to_string(), data);
367
368 Ok(self)
369 }
370
371 /// Similar to [`Scanner::set_module_output`], but receives a module name
372 /// and the protobuf message as raw data.
373 ///
374 /// `name` can be either the YARA module name (i.e: "pe", "elf", "dotnet",
375 /// etc.) or the fully-qualified name for the protobuf message associated
376 /// to the module (i.e: "pe.PE", "elf.ELF", "dotnet.Dotnet", etc.).
377 pub fn set_module_output_raw(
378 &mut self,
379 name: &str,
380 data: &[u8],
381 ) -> Result<&mut Self, ScanError> {
382 // Try to find the module by name first, if not found, then try
383 // to find a module where the fully-qualified name for its protobuf
384 // message matches the `name` arguments.
385 let descriptor = if let Some(module) = BUILTIN_MODULES.get(name) {
386 Some(&module.root_struct_descriptor)
387 } else {
388 BUILTIN_MODULES.values().find_map(|module| {
389 if module.root_struct_descriptor.full_name() == name {
390 Some(&module.root_struct_descriptor)
391 } else {
392 None
393 }
394 })
395 };
396
397 if descriptor.is_none() {
398 return Err(ScanError::UnknownModule { module: name.to_string() });
399 }
400
401 let mut is = CodedInputStream::from_bytes(data);
402
403 // Default recursion limit is 100, that's not enough for some deeply
404 // nested structures like the process tree in the `vt` module.
405 is.set_recursion_limit(500);
406
407 self.set_module_output(
408 descriptor.unwrap().parse_from(&mut is).map_err(|err| {
409 ScanError::ProtoError { module: name.to_string(), err }
410 })?,
411 )
412 }
413
414 /// Returns profiling data for the slowest N rules.
415 ///
416 /// The profiling data reflects the cumulative execution time of each rule
417 /// across all scanned files. This information is useful for identifying
418 /// performance bottlenecks. To reset the profiling data and start fresh
419 /// for subsequent scans, use [`Scanner::clear_profiling_data`].
420 #[cfg(feature = "rules-profiling")]
421 pub fn slowest_rules(&self, n: usize) -> Vec<ProfilingData<'_>> {
422 self.scan_context().slowest_rules(n)
423 }
424
425 /// Clears all accumulated profiling data.
426 ///
427 /// This method resets the profiling data collected during rule execution
428 /// across scanned files. Use this to start a new profiling session, ensuring
429 /// the results reflect only the data gathered after this method is called.
430 #[cfg(feature = "rules-profiling")]
431 pub fn clear_profiling_data(&mut self) {
432 self.scan_context_mut().clear_profiling_data()
433 }
434}
435
436impl<'r> Scanner<'r> {
437 #[cfg(feature = "rules-profiling")]
438 #[inline]
439 fn scan_context<'a>(&self) -> &ScanContext<'r, 'a> {
440 unsafe {
441 transmute::<&ScanContext<'static, 'static>, &ScanContext<'r, '_>>(
442 self.wasm_store.data(),
443 )
444 }
445 }
446 #[inline]
447 fn scan_context_mut<'a>(&mut self) -> &mut ScanContext<'r, 'a> {
448 unsafe {
449 transmute::<
450 &mut ScanContext<'static, 'static>,
451 &mut ScanContext<'r, '_>,
452 >(self.wasm_store.data_mut())
453 }
454 }
455
456 fn load_file<'a>(
457 &self,
458 path: &Path,
459 ) -> Result<ScannedData<'a>, ScanError> {
460 let mut file = fs::File::open(path).map_err(|err| {
461 ScanError::OpenError { path: path.to_path_buf(), err }
462 })?;
463
464 let size = file.metadata().map(|m| m.len()).unwrap_or(0);
465
466 let mut buffered_file;
467 let mapped_file;
468
469 // For files smaller than ~500MB reading the whole file is faster than
470 // using a memory-mapped file.
471 let data = if self.use_mmap && size > 500_000_000 {
472 mapped_file = unsafe {
473 MmapOptions::new().map_copy_read_only(&file).map_err(|err| {
474 ScanError::MapError { path: path.to_path_buf(), err }
475 })
476 }?;
477 ScannedData::Mmap(mapped_file)
478 } else {
479 buffered_file = Vec::with_capacity(size as usize);
480 file.read_to_end(&mut buffered_file).map_err(|err| {
481 ScanError::OpenError { path: path.to_path_buf(), err }
482 })?;
483 ScannedData::Vec(buffered_file)
484 };
485
486 Ok(data)
487 }
488
489 fn scan_impl<'a, 'opts>(
490 &'a mut self,
491 data: ScannedData<'a>,
492 options: Option<ScanOptions<'opts>>,
493 ) -> Result<ScanResults<'a, 'r>, ScanError> {
494 let ctx = self.scan_context_mut();
495
496 // Clear information about matches found in a previous scan, if any.
497 ctx.reset();
498
499 // Set the global variable `filesize` to the size of the scanned data.
500 ctx.set_filesize(data.as_ref().len() as i64);
501
502 // Indicate that the scanner is currently scanning the given data.
503 ctx.scan_state = ScanState::ScanningData(data);
504
505 for module_name in ctx.compiled_rules.imports() {
506 // Lookup the module in the list of built-in modules.
507 let module = modules::BUILTIN_MODULES
508 .get(module_name)
509 .unwrap_or_else(|| panic!("module `{module_name}` not found"));
510
511 let root_struct_name = module.root_struct_descriptor.full_name();
512
513 let module_output;
514 // If the user already provided some output for the module by
515 // calling `Scanner::set_module_output`, use that output. If not,
516 // call the module's main function (if the module has a main
517 // function) for getting its output.
518 if let Some(output) =
519 ctx.user_provided_module_outputs.remove(root_struct_name)
520 {
521 module_output = Some(output);
522 } else {
523 let meta: Option<&'opts [u8]> =
524 options.as_ref().and_then(|options| {
525 options.module_metadata.get(module_name).copied()
526 });
527
528 if let Some(main_fn) = module.main_fn {
529 module_output = Some(
530 main_fn(ctx.scanned_data().unwrap(), meta).map_err(
531 |err| ScanError::ModuleError {
532 module: module_name.to_string(),
533 err,
534 },
535 )?,
536 );
537 } else {
538 module_output = None;
539 }
540 }
541
542 if let Some(module_output) = &module_output {
543 // Make sure that the module is returning a protobuf message of
544 // the expected type.
545 debug_assert_eq!(
546 module_output.descriptor_dyn().full_name(),
547 module.root_struct_descriptor.full_name(),
548 "main function of module `{}` must return `{}`, but returned `{}`",
549 module_name,
550 module.root_struct_descriptor.full_name(),
551 module_output.descriptor_dyn().full_name(),
552 );
553
554 // Make sure that the module is returning a protobuf message
555 // where all required fields are initialized. This only applies
556 // to proto2, proto3 doesn't have "required" fields, all fields
557 // are optional.
558 debug_assert!(
559 module_output.is_initialized_dyn(),
560 "module `{}` returned a protobuf `{}` where some required fields are not initialized ",
561 module_name,
562 module.root_struct_descriptor.full_name()
563 );
564 }
565
566 // When constant folding is enabled we don't need to generate
567 // structure fields for enums. This is because during the
568 // optimization process symbols like MyEnum.ENUM_ITEM are resolved
569 // to their constant values at compile time. In other words, the
570 // compiler determines that MyEnum.ENUM_ITEM is equal to some value
571 // X, and uses that value in the generated code.
572 //
573 // However, without constant folding, enums are treated as any
574 // other field in a struct, and their values are determined at scan
575 // time. For that reason these fields must be generated for enums
576 // when constant folding is disabled.
577 let generate_fields_for_enums =
578 !cfg!(feature = "constant-folding");
579
580 let module_struct = Struct::from_proto_descriptor_and_msg(
581 &module.root_struct_descriptor,
582 module_output.as_deref(),
583 generate_fields_for_enums,
584 );
585
586 if let Some(module_output) = module_output {
587 ctx.module_outputs
588 .insert(root_struct_name.to_string(), module_output);
589 }
590
591 // The data structure obtained from the module is added to the
592 // root structure. Any data from previous scans will be replaced
593 // with the new data structure.
594 ctx.root_struct
595 .add_field(module_name, TypeValue::Struct(module_struct));
596 }
597
598 // The user provided module outputs are not needed anymore. Let's
599 // clear any remaining entry in the hash map (which can happen if
600 // the user has set outputs for modules that are not even imported
601 // by the rules.
602 ctx.user_provided_module_outputs.clear();
603
604 // Clear the flag that indicates that the search phase was done.
605 ctx.set_pattern_search_done(false);
606
607 // Evaluate the conditions of every rule, this will call
608 // `ScanContext::search_for_patterns` if necessary.
609 ctx.eval_conditions()?;
610
611 let data = match ctx.scan_state.take() {
612 ScanState::ScanningData(data) => data,
613 _ => unreachable!(),
614 };
615
616 ctx.scan_state = ScanState::Finished(DataSnippets::SingleBlock(data));
617
618 Ok(ScanResults::new(ctx))
619 }
620}
621
622/// Helper type that exposes the data matched during a scan operation.
623///
624/// Matching data can be accessed through the [`Match::data`] method. Normally,
625/// this data can be retrieved by slicing directly into the scanned input.
626/// However, that requires the original input to remain valid until the scan
627/// results are processed. This works fine for a single contiguous block of
628/// memory, but is impractical when scanning multiple blocks, since holding
629/// onto all of them until the end would consume excessive memory.
630///
631/// To handle this, two strategies are used:
632///
633/// - **Single-block scans**: Data is accessed directly from the input slice.
634/// - **Multi-block scans**: Matching fragments are copied and retained in a
635/// BTreeMap until the results are processed. The keys in the btree are
636/// the offsets where the snippets start and the values are vectors with
637/// the snippet's data.
638///
639/// Each strategy corresponds to a variant in this enum.
640pub(crate) enum DataSnippets<'d> {
641 SingleBlock(ScannedData<'d>),
642 MultiBlock(BTreeMap<usize, Vec<u8>>),
643}
644
645impl DataSnippets<'_> {
646 pub(crate) fn get(&self, range: Range<usize>) -> Option<&[u8]> {
647 match self {
648 Self::SingleBlock(data) => data.as_ref().get(range),
649 Self::MultiBlock(btree) => {
650 // Find in the btree the snippet that starts exactly at the
651 // offset indicated by range.start, if not found, take the
652 // previous one, which may also contain the requested range.
653 let (snippet_offset, snippet_data) =
654 btree.range(..=range.start).next_back()?;
655
656 // Calculate the start and end of the slice within the snippet.
657 let start = range.start - snippet_offset;
658 let end = range.end - snippet_offset;
659
660 // Returns the data, or `None` if `start` and `end` are not
661 // within the snippet boundaries.
662 snippet_data.get(start..end)
663 }
664 }
665 }
666}
667
668/// Results of a scan operation.
669///
670/// Allows iterating over both the matching and non-matching rules.
671pub struct ScanResults<'a, 'r> {
672 ctx: &'a ScanContext<'r, 'a>,
673}
674
675impl Debug for ScanResults<'_, '_> {
676 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
677 f.write_str("ScanResults")
678 }
679}
680
681impl<'a, 'r> ScanResults<'a, 'r> {
682 fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
683 Self { ctx }
684 }
685
686 /// Returns an iterator that yields the matching rules in arbitrary order.
687 pub fn matching_rules(&self) -> MatchingRules<'_, 'r> {
688 MatchingRules::new(self.ctx)
689 }
690
691 /// Returns an iterator that yields the non-matching rules in arbitrary
692 /// order.
693 pub fn non_matching_rules(&self) -> NonMatchingRules<'_, 'r> {
694 NonMatchingRules::new(self.ctx)
695 }
696
697 /// Returns the protobuf produced by a YARA module after processing the
698 /// data.
699 ///
700 /// The result will be `None` if the module doesn't exist or didn't
701 /// produce any output.
702 pub fn module_output(
703 &self,
704 module_name: &str,
705 ) -> Option<&'a dyn MessageDyn> {
706 let module = BUILTIN_MODULES.get(module_name)?;
707 let module_output = self
708 .ctx
709 .module_outputs
710 .get(module.root_struct_descriptor.full_name())?
711 .as_ref();
712 Some(module_output)
713 }
714
715 /// Returns an iterator that yields tuples composed of a YARA module name
716 /// and the protobuf produced by that module.
717 ///
718 /// Only returns the modules that produced some output.
719 pub fn module_outputs(&self) -> ModuleOutputs<'a, 'r> {
720 ModuleOutputs::new(self.ctx)
721 }
722}
723
724/// Iterator that yields the rules that matched during a scan.
725///
726/// Private rules are not included by default, use
727/// [`MatchingRules::include_private`] for changing this behaviour.
728pub struct MatchingRules<'a, 'r> {
729 ctx: &'a ScanContext<'r, 'a>,
730 iterator: Iter<'a, RuleId>,
731 len_non_private: usize,
732 len_private: usize,
733 include_private: bool,
734}
735
736impl<'a, 'r> MatchingRules<'a, 'r> {
737 fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
738 Self {
739 ctx,
740 iterator: ctx.matching_rules.iter(),
741 include_private: false,
742 len_non_private: ctx.matching_rules.len()
743 - ctx.num_matching_private_rules,
744 len_private: ctx.num_matching_private_rules,
745 }
746 }
747
748 /// Specifies whether the iterator should yield private rules.
749 ///
750 /// This does not reset the iterator to its initial state, the iterator will
751 /// continue from its current position.
752 pub fn include_private(mut self, yes: bool) -> Self {
753 self.include_private = yes;
754 self
755 }
756}
757
758impl<'a, 'r> Iterator for MatchingRules<'a, 'r> {
759 type Item = Rule<'a, 'r>;
760
761 fn next(&mut self) -> Option<Self::Item> {
762 let rules = self.ctx.compiled_rules;
763 loop {
764 let rule_id = *self.iterator.next()?;
765 let rule_info = rules.get(rule_id);
766 if rule_info.is_private {
767 self.len_private -= 1;
768 } else {
769 self.len_non_private -= 1;
770 }
771 if self.include_private || !rule_info.is_private {
772 return Some(Rule { ctx: Some(self.ctx), rule_info, rules });
773 }
774 }
775 }
776}
777
778impl ExactSizeIterator for MatchingRules<'_, '_> {
779 #[inline]
780 fn len(&self) -> usize {
781 if self.include_private {
782 self.len_non_private + self.len_private
783 } else {
784 self.len_non_private
785 }
786 }
787}
788
789/// Iterator that yields the rules that didn't match during a scan.
790///
791/// Private rules are not included by default, use
792/// [`NonMatchingRules::include_private`] for changing this behaviour.
793pub struct NonMatchingRules<'a, 'r> {
794 ctx: &'a ScanContext<'r, 'a>,
795 iterator: bitvec::slice::IterZeros<'a, u8, Lsb0>,
796 include_private: bool,
797 len_private: usize,
798 len_non_private: usize,
799}
800
801impl<'a, 'r> NonMatchingRules<'a, 'r> {
802 fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
803 let num_rules = ctx.compiled_rules.num_rules();
804 let main_memory = ctx
805 .wasm_main_memory
806 .unwrap()
807 .data(unsafe { ctx.wasm_store.as_ref() });
808
809 let base = MATCHING_RULES_BITMAP_BASE as usize;
810
811 // Create a BitSlice that covers the region of main memory containing
812 // the bitmap that tells which rules matched and which did not.
813 let matching_rules_bitmap = BitSlice::<_, Lsb0>::from_slice(
814 &main_memory[base..base + num_rules / 8 + 1],
815 );
816
817 // The BitSlice will cover more bits than necessary, for example, if
818 // there are 3 rules the BitSlice will have 8 bits because it is
819 // created from a u8 slice that has 1 byte. Here we make sure that
820 // the BitSlice has exactly as many bits as existing rules.
821 let matching_rules_bitmap = &matching_rules_bitmap[0..num_rules];
822
823 Self {
824 ctx,
825 iterator: matching_rules_bitmap.iter_zeros(),
826 include_private: false,
827 len_non_private: ctx.compiled_rules.num_rules()
828 - ctx.matching_rules.len()
829 - ctx.num_non_matching_private_rules,
830 len_private: ctx.num_non_matching_private_rules,
831 }
832 }
833
834 /// Specifies whether the iterator should yield private rules.
835 ///
836 /// This does not reset the iterator to its initial state, the iterator will
837 /// continue from its current position.
838 pub fn include_private(mut self, yes: bool) -> Self {
839 self.include_private = yes;
840 self
841 }
842}
843
844impl<'a, 'r> Iterator for NonMatchingRules<'a, 'r> {
845 type Item = Rule<'a, 'r>;
846
847 fn next(&mut self) -> Option<Self::Item> {
848 let rules = self.ctx.compiled_rules;
849
850 loop {
851 let rule_id = RuleId::from(self.iterator.next()?);
852 let rule_info = rules.get(rule_id);
853
854 if rule_info.is_private {
855 self.len_private -= 1;
856 } else {
857 self.len_non_private -= 1;
858 }
859
860 if self.include_private || !rule_info.is_private {
861 return Some(Rule { ctx: Some(self.ctx), rule_info, rules });
862 }
863 }
864 }
865}
866
867impl ExactSizeIterator for NonMatchingRules<'_, '_> {
868 #[inline]
869 fn len(&self) -> usize {
870 if self.include_private {
871 self.len_non_private + self.len_private
872 } else {
873 self.len_non_private
874 }
875 }
876}
877
878/// Iterator that returns the outputs produced by YARA modules.
879pub struct ModuleOutputs<'a, 'r> {
880 ctx: &'a ScanContext<'r, 'a>,
881 len: usize,
882 iterator: hash_map::Iter<'a, &'a str, Module>,
883}
884
885impl<'a, 'r> ModuleOutputs<'a, 'r> {
886 fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
887 Self {
888 ctx,
889 len: ctx.module_outputs.len(),
890 iterator: BUILTIN_MODULES.iter(),
891 }
892 }
893}
894
895impl ExactSizeIterator for ModuleOutputs<'_, '_> {
896 #[inline]
897 fn len(&self) -> usize {
898 self.len
899 }
900}
901
902impl<'a> Iterator for ModuleOutputs<'a, '_> {
903 type Item = (&'a str, &'a dyn MessageDyn);
904
905 fn next(&mut self) -> Option<Self::Item> {
906 loop {
907 let (name, module) = self.iterator.next()?;
908 if let Some(module_output) = self
909 .ctx
910 .module_outputs
911 .get(module.root_struct_descriptor.full_name())
912 {
913 return Some((*name, module_output.as_ref()));
914 }
915 }
916 }
917}
918
919#[cfg(test)]
920mod snippet_tests {
921 use super::DataSnippets;
922 use std::collections::BTreeMap;
923
924 #[test]
925 fn snippets() {
926 let mut btree_map = BTreeMap::new();
927
928 btree_map.insert(0, vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
929 btree_map.insert(50, vec![51, 52, 53, 54]);
930
931 let snippets = DataSnippets::MultiBlock(btree_map);
932
933 assert_eq!(snippets.get(0..2), Some([1, 2].as_slice()));
934 assert_eq!(snippets.get(1..3), Some([2, 3].as_slice()));
935 assert_eq!(snippets.get(8..9), Some([9].as_slice()));
936 assert_eq!(snippets.get(9..10), None);
937 assert_eq!(snippets.get(50..51), Some([51].as_slice()));
938 assert_eq!(snippets.get(50..54), Some([51, 52, 53, 54].as_slice()));
939 assert_eq!(snippets.get(52..54), Some([53, 54].as_slice()));
940 assert_eq!(snippets.get(50..56), None);
941 }
942}