yara_x/scanner/
mod.rs

1/*! This module implements the YARA scanner.
2
3The scanner takes the rules produces by the compiler and scans data with them.
4*/
5use std::collections::{hash_map, BTreeMap, HashMap};
6use std::fmt::{Debug, Formatter};
7use std::fs;
8use std::io::Read;
9use std::mem::transmute;
10use std::ops::Range;
11use std::path::{Path, PathBuf};
12use std::pin::Pin;
13use std::slice::Iter;
14use std::sync::atomic::AtomicU64;
15use std::sync::Once;
16use std::time::Duration;
17
18use bitvec::prelude::*;
19use memmap2::{Mmap, MmapOptions};
20use protobuf::{CodedInputStream, MessageDyn};
21use thiserror::Error;
22use wasmtime::Store;
23
24use crate::compiler::{RuleId, Rules};
25use crate::models::Rule;
26use crate::modules::{Module, ModuleError, BUILTIN_MODULES};
27use crate::scanner::context::create_wasm_store_and_ctx;
28use crate::types::{Struct, TypeValue};
29use crate::variables::VariableError;
30use crate::wasm::MATCHING_RULES_BITMAP_BASE;
31use crate::{modules, Variable};
32
33pub(crate) use crate::scanner::context::RuntimeObject;
34pub(crate) use crate::scanner::context::RuntimeObjectHandle;
35pub(crate) use crate::scanner::context::ScanContext;
36pub(crate) use crate::scanner::context::ScanState;
37pub(crate) use crate::scanner::matches::Match;
38
39mod context;
40mod matches;
41
42pub mod blocks;
43
44#[cfg(test)]
45mod tests;
46
47/// Error returned when a scan operation fails.
48#[derive(Error, Debug)]
49#[non_exhaustive]
50pub enum ScanError {
51    /// The scan was aborted after the timeout period.
52    #[error("timeout")]
53    Timeout,
54    /// Could not open the scanned file.
55    #[error("can not open `{path}`: {err}")]
56    OpenError {
57        /// Path of the file being scanned.
58        path: PathBuf,
59        /// Error that occurred.
60        err: std::io::Error,
61    },
62    /// Could not map the scanned file into memory.
63    #[error("can not map `{path}`: {err}")]
64    MapError {
65        /// Path of the file being scanned.
66        path: PathBuf,
67        /// Error that occurred.
68        err: std::io::Error,
69    },
70    /// Could not deserialize the protobuf message for some YARA module.
71    #[error("can not deserialize protobuf message for YARA module `{module}`: {err}")]
72    ProtoError {
73        /// Module name.
74        module: String,
75        /// Error that occurred.
76        err: protobuf::Error,
77    },
78    /// The module is unknown.
79    #[error("unknown module `{module}`")]
80    UnknownModule {
81        /// Module name.
82        module: String,
83    },
84    /// Some module produced an error when it was invoked.
85    #[error("error in module `{module}`: {err}")]
86    ModuleError {
87        /// Module name.
88        module: String,
89        /// Error that occurred.
90        err: ModuleError,
91    },
92}
93
94/// Global counter that gets incremented every 1 second by a dedicated thread.
95///
96/// This counter is used for determining when a scan operation has timed out.
97static HEARTBEAT_COUNTER: AtomicU64 = AtomicU64::new(0);
98
99/// Used for spawning the thread that increments `HEARTBEAT_COUNTER`.
100static INIT_HEARTBEAT: Once = Once::new();
101
102/// Represents the data being scanned.
103///
104/// The scanned data can be backed by a slice owned by someone else, or a
105/// vector or memory-mapped file owned by `ScannedData` itself.
106pub enum ScannedData<'d> {
107    Slice(&'d [u8]),
108    Vec(Vec<u8>),
109    Mmap(Mmap),
110}
111
112impl AsRef<[u8]> for ScannedData<'_> {
113    fn as_ref(&self) -> &[u8] {
114        match self {
115            ScannedData::Slice(s) => s,
116            ScannedData::Vec(v) => v.as_ref(),
117            ScannedData::Mmap(m) => m.as_ref(),
118        }
119    }
120}
121
122impl<'d> TryInto<ScannedData<'d>> for &'d [u8] {
123    type Error = ScanError;
124    fn try_into(self) -> Result<ScannedData<'d>, Self::Error> {
125        Ok(ScannedData::Slice(self))
126    }
127}
128
129impl<'d, const N: usize> TryInto<ScannedData<'d>> for &'d [u8; N] {
130    type Error = ScanError;
131    fn try_into(self) -> Result<ScannedData<'d>, Self::Error> {
132        Ok(ScannedData::Slice(self))
133    }
134}
135
136/// Contains information about the time spent on a rule.
137#[cfg(feature = "rules-profiling")]
138pub struct ProfilingData<'r> {
139    /// Rule namespace.
140    pub namespace: &'r str,
141    /// Rule name.
142    pub rule: &'r str,
143    /// Time spent executing the rule's condition.
144    pub condition_exec_time: Duration,
145    /// Time spent matching the rule's patterns.
146    pub pattern_matching_time: Duration,
147}
148
149/// Optional information for the scan operation.
150#[derive(Debug, Default)]
151pub struct ScanOptions<'a> {
152    module_metadata: HashMap<&'a str, &'a [u8]>,
153}
154
155impl<'a> ScanOptions<'a> {
156    /// Creates a new instance of `ScanOptions` with no additional information
157    /// for the scan operation.
158    ///
159    /// Use other methods to add additional information.
160    pub fn new() -> Self {
161        Self { module_metadata: Default::default() }
162    }
163
164    /// Adds metadata for a YARA module.
165    pub fn set_module_metadata(
166        mut self,
167        module_name: &'a str,
168        metadata: &'a [u8],
169    ) -> Self {
170        self.module_metadata.insert(module_name, metadata);
171        self
172    }
173}
174
175/// Scans data with already compiled YARA rules.
176///
177/// The scanner receives a set of compiled [`Rules`] and scans data with those
178/// rules. The same scanner can be used for scanning multiple files or
179/// in-memory data sequentially, but you need multiple scanners for scanning in
180/// parallel.
181pub struct Scanner<'r> {
182    _rules: &'r Rules,
183    wasm_store: Pin<Box<Store<ScanContext<'static, 'static>>>>,
184    use_mmap: bool,
185}
186
187impl<'r> Scanner<'r> {
188    /// Creates a new scanner.
189    pub fn new(rules: &'r Rules) -> Self {
190        let wasm_store = create_wasm_store_and_ctx(rules);
191        Self { _rules: rules, wasm_store, use_mmap: true }
192    }
193
194    /// Sets a timeout for scan operations.
195    ///
196    /// The scan functions will return an [ScanError::Timeout] once the
197    /// provided timeout duration has elapsed. The scanner will make every
198    /// effort to stop promptly after the designated timeout duration. However,
199    /// in some cases, particularly with rules containing only a few patterns,
200    /// the scanner could potentially continue running for a longer period than
201    /// the specified timeout.
202    pub fn set_timeout(&mut self, timeout: Duration) -> &mut Self {
203        self.scan_context_mut().set_timeout(timeout);
204        self
205    }
206
207    /// Sets the maximum number of matches per pattern.
208    ///
209    /// When some pattern reaches the maximum number of patterns it won't
210    /// produce more matches.
211    pub fn max_matches_per_pattern(&mut self, n: usize) -> &mut Self {
212        self.scan_context_mut().pattern_matches.max_matches_per_pattern(n);
213        self
214    }
215
216    /// Specifies whether [`Scanner::scan_file`] and [`Scanner::scan_file_with_options`]
217    /// may use memory-mapped files to read input.
218    ///
219    /// By default, the scanner uses memory mapping for very large files, as this
220    /// is typically faster than copying file contents into memory. However, this
221    /// approach has a drawback: if another process truncates the file during
222    /// scanning, a `SIGBUS` signal may occur.
223    ///
224    /// Setting this option disables memory mapping and forces the scanner to
225    /// always read files into an in-memory buffer instead. This method is slower,
226    /// but safer.
227    pub fn use_mmap(&mut self, yes: bool) -> &mut Self {
228        self.use_mmap = yes;
229        self
230    }
231
232    /// Sets a callback that is invoked every time a YARA rule calls the
233    /// `console` module.
234    ///
235    /// The `callback` function is invoked with a string representing the
236    /// message being logged. The function can print the message to stdout,
237    /// append it to a file, etc. If no callback is set these messages are
238    /// ignored.
239    pub fn console_log<F>(&mut self, callback: F) -> &mut Self
240    where
241        F: FnMut(String) + 'r,
242    {
243        self.scan_context_mut().console_log = Some(Box::new(callback));
244        self
245    }
246
247    /// Scans in-memory data.
248    pub fn scan<'a>(
249        &'a mut self,
250        data: &'a [u8],
251    ) -> Result<ScanResults<'a, 'r>, ScanError> {
252        self.scan_impl(data.try_into()?, None)
253    }
254
255    /// Scans a file.
256    pub fn scan_file<'a, P>(
257        &'a mut self,
258        target: P,
259    ) -> Result<ScanResults<'a, 'r>, ScanError>
260    where
261        P: AsRef<Path>,
262    {
263        self.scan_impl(self.load_file(target.as_ref())?, None)
264    }
265
266    /// Like [`Scanner::scan`], but allows to specify additional scan options.
267    pub fn scan_with_options<'a, 'opts>(
268        &'a mut self,
269        data: &'a [u8],
270        options: ScanOptions<'opts>,
271    ) -> Result<ScanResults<'a, 'r>, ScanError> {
272        self.scan_impl(ScannedData::Slice(data), Some(options))
273    }
274
275    /// Like [`Scanner::scan_file`], but allows to specify additional scan
276    /// options.
277    pub fn scan_file_with_options<'opts, P>(
278        &mut self,
279        target: P,
280        options: ScanOptions<'opts>,
281    ) -> Result<ScanResults<'_, 'r>, ScanError>
282    where
283        P: AsRef<Path>,
284    {
285        self.scan_impl(self.load_file(target.as_ref())?, Some(options))
286    }
287
288    /// Sets the value of a global variable.
289    ///
290    /// The variable must has been previously defined by calling
291    /// [`crate::Compiler::define_global`], and the type it has during the
292    /// definition must match the type of the new value (`T`).
293    ///
294    /// The variable will retain the new value in subsequent scans, unless this
295    /// function is called again for setting a new value.
296    pub fn set_global<T: TryInto<Variable>>(
297        &mut self,
298        ident: &str,
299        value: T,
300    ) -> Result<&mut Self, VariableError>
301    where
302        VariableError: From<<T as TryInto<Variable>>::Error>,
303    {
304        self.scan_context_mut().set_global(ident, value)?;
305        Ok(self)
306    }
307
308    /// Sets the output data for a YARA module.
309    ///
310    /// Each YARA module generates an output consisting of a data structure that
311    /// contains information about the scanned file. This data structure is
312    /// represented by a Protocol Buffer message. Typically, you won't need to
313    /// provide this data yourself, as the YARA module automatically generates
314    /// different outputs for each file it scans.
315    ///
316    /// However, there are two scenarios in which you may want to provide the
317    /// output for a module yourself:
318    ///
319    /// 1) When the module does not produce any output on its own.
320    /// 2) When you already know the output of the module for the upcoming file
321    ///    to be scanned, and you prefer to reuse this data instead of generating
322    ///    it again.
323    ///
324    /// Case 1) applies to certain modules lacking a main function, thus
325    /// incapable of producing any output on their own. For such modules, you
326    /// must set the output before scanning the associated data. Since the
327    /// module's output typically varies with each scanned file, you need to
328    /// call [`Scanner::set_module_output`] prior to each invocation of
329    /// [`Scanner::scan`]. Once [`Scanner::scan`] is executed, the module's
330    /// output is consumed and will be empty unless set again before the
331    /// subsequent call.
332    ///
333    /// Case 2) applies when you have previously stored the module's output for
334    /// certain scanned data. In such cases, when rescanning the data, you can
335    /// utilize this function to supply the module's output, thereby preventing
336    /// redundant computation by the module. This optimization enhances
337    /// performance by eliminating the need for the module to reparse the
338    /// scanned data.
339    ///
340    /// <br>
341    ///
342    /// The `data` argument must be a Protocol Buffer message corresponding
343    /// to any of the existing YARA modules.
344    pub fn set_module_output(
345        &mut self,
346        data: Box<dyn MessageDyn>,
347    ) -> Result<&mut Self, ScanError> {
348        let descriptor = data.descriptor_dyn();
349        let full_name = descriptor.full_name();
350
351        // Check if the protobuf message passed to this function corresponds
352        // with any of the existing modules.
353        if !BUILTIN_MODULES
354            .iter()
355            .any(|m| m.1.root_struct_descriptor.full_name() == full_name)
356        {
357            return Err(ScanError::UnknownModule {
358                module: full_name.to_string(),
359            });
360        }
361
362        self.scan_context_mut()
363            .user_provided_module_outputs
364            .insert(full_name.to_string(), data);
365
366        Ok(self)
367    }
368
369    /// Similar to [`Scanner::set_module_output`], but receives a module name
370    /// and the protobuf message as raw data.
371    ///
372    /// `name` can be either the YARA module name (i.e: "pe", "elf", "dotnet",
373    /// etc.) or the fully-qualified name for the protobuf message associated
374    /// to the module (i.e: "pe.PE", "elf.ELF", "dotnet.Dotnet", etc.).
375    pub fn set_module_output_raw(
376        &mut self,
377        name: &str,
378        data: &[u8],
379    ) -> Result<&mut Self, ScanError> {
380        // Try to find the module by name first, if not found, then try
381        // to find a module where the fully-qualified name for its protobuf
382        // message matches the `name` arguments.
383        let descriptor = if let Some(module) = BUILTIN_MODULES.get(name) {
384            Some(&module.root_struct_descriptor)
385        } else {
386            BUILTIN_MODULES.values().find_map(|module| {
387                if module.root_struct_descriptor.full_name() == name {
388                    Some(&module.root_struct_descriptor)
389                } else {
390                    None
391                }
392            })
393        };
394
395        if descriptor.is_none() {
396            return Err(ScanError::UnknownModule { module: name.to_string() });
397        }
398
399        let mut is = CodedInputStream::from_bytes(data);
400
401        // Default recursion limit is 100, that's not enough for some deeply
402        // nested structures like the process tree in the `vt` module.
403        is.set_recursion_limit(500);
404
405        self.set_module_output(
406            descriptor.unwrap().parse_from(&mut is).map_err(|err| {
407                ScanError::ProtoError { module: name.to_string(), err }
408            })?,
409        )
410    }
411
412    /// Returns profiling data for the slowest N rules.
413    ///
414    /// The profiling data reflects the cumulative execution time of each rule
415    /// across all scanned files. This information is useful for identifying
416    /// performance bottlenecks. To reset the profiling data and start fresh
417    /// for subsequent scans, use [`Scanner::clear_profiling_data`].
418    #[cfg(feature = "rules-profiling")]
419    pub fn slowest_rules(&self, n: usize) -> Vec<ProfilingData<'_>> {
420        self.scan_context().slowest_rules(n)
421    }
422
423    /// Clears all accumulated profiling data.
424    ///
425    /// This method resets the profiling data collected during rule execution
426    /// across scanned files. Use this to start a new profiling session, ensuring
427    /// the results reflect only the data gathered after this method is called.
428    #[cfg(feature = "rules-profiling")]
429    pub fn clear_profiling_data(&mut self) {
430        self.scan_context_mut().clear_profiling_data()
431    }
432}
433
434impl<'r> Scanner<'r> {
435    #[cfg(feature = "rules-profiling")]
436    #[inline]
437    fn scan_context<'a>(&self) -> &ScanContext<'r, 'a> {
438        unsafe {
439            transmute::<&ScanContext<'static, 'static>, &ScanContext<'r, '_>>(
440                self.wasm_store.data(),
441            )
442        }
443    }
444    #[inline]
445    fn scan_context_mut<'a>(&mut self) -> &mut ScanContext<'r, 'a> {
446        unsafe {
447            transmute::<
448                &mut ScanContext<'static, 'static>,
449                &mut ScanContext<'r, '_>,
450            >(self.wasm_store.data_mut())
451        }
452    }
453
454    fn load_file<'a>(
455        &self,
456        path: &Path,
457    ) -> Result<ScannedData<'a>, ScanError> {
458        let mut file = fs::File::open(path).map_err(|err| {
459            ScanError::OpenError { path: path.to_path_buf(), err }
460        })?;
461
462        let size = file.metadata().map(|m| m.len()).unwrap_or(0);
463
464        let mut buffered_file;
465        let mapped_file;
466
467        // For files smaller than ~500MB reading the whole file is faster than
468        // using a memory-mapped file.
469        let data = if self.use_mmap && size > 500_000_000 {
470            mapped_file = unsafe {
471                MmapOptions::new().map_copy_read_only(&file).map_err(|err| {
472                    ScanError::MapError { path: path.to_path_buf(), err }
473                })
474            }?;
475            ScannedData::Mmap(mapped_file)
476        } else {
477            buffered_file = Vec::with_capacity(size as usize);
478            file.read_to_end(&mut buffered_file).map_err(|err| {
479                ScanError::OpenError { path: path.to_path_buf(), err }
480            })?;
481            ScannedData::Vec(buffered_file)
482        };
483
484        Ok(data)
485    }
486
487    fn scan_impl<'a, 'opts>(
488        &'a mut self,
489        data: ScannedData<'a>,
490        options: Option<ScanOptions<'opts>>,
491    ) -> Result<ScanResults<'a, 'r>, ScanError> {
492        let ctx = self.scan_context_mut();
493
494        // Clear information about matches found in a previous scan, if any.
495        ctx.reset();
496
497        // Set the global variable `filesize` to the size of the scanned data.
498        ctx.set_filesize(data.as_ref().len() as i64);
499
500        // Indicate that the scanner is currently scanning the given data.
501        ctx.scan_state = ScanState::ScanningData(data);
502
503        for module_name in ctx.compiled_rules.imports() {
504            // Lookup the module in the list of built-in modules.
505            let module = modules::BUILTIN_MODULES
506                .get(module_name)
507                .unwrap_or_else(|| panic!("module `{module_name}` not found"));
508
509            let root_struct_name = module.root_struct_descriptor.full_name();
510
511            let module_output;
512            // If the user already provided some output for the module by
513            // calling `Scanner::set_module_output`, use that output. If not,
514            // call the module's main function (if the module has a main
515            // function) for getting its output.
516            if let Some(output) =
517                ctx.user_provided_module_outputs.remove(root_struct_name)
518            {
519                module_output = Some(output);
520            } else {
521                let meta: Option<&'opts [u8]> =
522                    options.as_ref().and_then(|options| {
523                        options.module_metadata.get(module_name).copied()
524                    });
525
526                if let Some(main_fn) = module.main_fn {
527                    module_output = Some(
528                        main_fn(ctx.scanned_data().unwrap(), meta).map_err(
529                            |err| ScanError::ModuleError {
530                                module: module_name.to_string(),
531                                err,
532                            },
533                        )?,
534                    );
535                } else {
536                    module_output = None;
537                }
538            }
539
540            if let Some(module_output) = &module_output {
541                // Make sure that the module is returning a protobuf message of
542                // the expected type.
543                debug_assert_eq!(
544                    module_output.descriptor_dyn().full_name(),
545                    module.root_struct_descriptor.full_name(),
546                    "main function of module `{}` must return `{}`, but returned `{}`",
547                    module_name,
548                    module.root_struct_descriptor.full_name(),
549                    module_output.descriptor_dyn().full_name(),
550                );
551
552                // Make sure that the module is returning a protobuf message
553                // where all required fields are initialized. This only applies
554                // to proto2, proto3 doesn't have "required" fields, all fields
555                // are optional.
556                debug_assert!(
557                    module_output.is_initialized_dyn(),
558                    "module `{}` returned a protobuf `{}` where some required fields are not initialized ",
559                    module_name,
560                    module.root_struct_descriptor.full_name()
561                );
562            }
563
564            // When constant folding is enabled we don't need to generate
565            // structure fields for enums. This is because during the
566            // optimization process symbols like MyEnum.ENUM_ITEM are resolved
567            // to their constant values at compile time. In other words, the
568            // compiler determines that MyEnum.ENUM_ITEM is equal to some value
569            // X, and uses that value in the generated code.
570            //
571            // However, without constant folding, enums are treated as any
572            // other field in a struct, and their values are determined at scan
573            // time. For that reason these fields must be generated for enums
574            // when constant folding is disabled.
575            let generate_fields_for_enums =
576                !cfg!(feature = "constant-folding");
577
578            let module_struct = Struct::from_proto_descriptor_and_msg(
579                &module.root_struct_descriptor,
580                module_output.as_deref(),
581                generate_fields_for_enums,
582            );
583
584            if let Some(module_output) = module_output {
585                ctx.module_outputs
586                    .insert(root_struct_name.to_string(), module_output);
587            }
588
589            // The data structure obtained from the module is added to the
590            // root structure. Any data from previous scans will be replaced
591            // with the new data structure.
592            ctx.root_struct
593                .add_field(module_name, TypeValue::Struct(module_struct));
594        }
595
596        // The user provided module outputs are not needed anymore. Let's
597        // clear any remaining entry in the hash map (which can happen if
598        // the user has set outputs for modules that are not even imported
599        // by the rules.
600        ctx.user_provided_module_outputs.clear();
601
602        // Evaluate the conditions of every rule, this will call
603        // `ScanContext::search_for_patterns` if necessary.
604        ctx.eval_conditions()?;
605
606        let data = match ctx.scan_state.take() {
607            ScanState::ScanningData(data) => data,
608            _ => unreachable!(),
609        };
610
611        ctx.scan_state = ScanState::Finished(DataSnippets::SingleBlock(data));
612
613        Ok(ScanResults::new(ctx))
614    }
615}
616
617/// Helper type that exposes the data matched during a scan operation.
618///
619/// Matching data can be accessed through the [`Match::data`] method. Normally,
620/// this data can be retrieved by slicing directly into the scanned input.
621/// However, that requires the original input to remain valid until the scan
622/// results are processed. This works fine for a single contiguous block of
623/// memory, but is impractical when scanning multiple blocks, since holding
624/// onto all of them until the end would consume excessive memory.
625///
626/// To handle this, two strategies are used:
627///
628/// - **Single-block scans**: Data is accessed directly from the input slice.
629/// - **Multi-block scans**: Matching fragments are copied and retained in a
630///   BTreeMap until the results are processed. The keys in the btree are
631///   the offsets where the snippets start and the values are vectors with
632///   the snippet's data.
633///
634/// Each strategy corresponds to a variant in this enum.
635pub(crate) enum DataSnippets<'d> {
636    SingleBlock(ScannedData<'d>),
637    MultiBlock(BTreeMap<usize, Vec<u8>>),
638}
639
640impl DataSnippets<'_> {
641    pub(crate) fn get(&self, range: Range<usize>) -> Option<&[u8]> {
642        match self {
643            Self::SingleBlock(data) => data.as_ref().get(range),
644            Self::MultiBlock(btree) => {
645                // Find in the btree the snippet that starts exactly at the
646                // offset indicated by range.start, if not found, take the
647                // previous one, which may also contain the requested range.
648                let (snippet_offset, snippet_data) =
649                    btree.range(..=range.start).next_back()?;
650
651                // Calculate the start and end of the slice within the snippet.
652                let start = range.start - snippet_offset;
653                let end = range.end - snippet_offset;
654
655                // Returns the data, or `None` if `start` and `end` are not
656                // within the snippet boundaries.
657                snippet_data.get(start..end)
658            }
659        }
660    }
661}
662
663/// Results of a scan operation.
664///
665/// Allows iterating over both the matching and non-matching rules.
666pub struct ScanResults<'a, 'r> {
667    ctx: &'a ScanContext<'r, 'a>,
668}
669
670impl Debug for ScanResults<'_, '_> {
671    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
672        f.write_str("ScanResults")
673    }
674}
675
676impl<'a, 'r> ScanResults<'a, 'r> {
677    fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
678        Self { ctx }
679    }
680
681    /// Returns an iterator that yields the matching rules in arbitrary order.
682    pub fn matching_rules(&self) -> MatchingRules<'_, 'r> {
683        MatchingRules::new(self.ctx)
684    }
685
686    /// Returns an iterator that yields the non-matching rules in arbitrary
687    /// order.
688    pub fn non_matching_rules(&self) -> NonMatchingRules<'_, 'r> {
689        NonMatchingRules::new(self.ctx)
690    }
691
692    /// Returns the protobuf produced by a YARA module after processing the
693    /// data.
694    ///
695    /// The result will be `None` if the module doesn't exist or didn't
696    /// produce any output.
697    pub fn module_output(
698        &self,
699        module_name: &str,
700    ) -> Option<&'a dyn MessageDyn> {
701        let module = BUILTIN_MODULES.get(module_name)?;
702        let module_output = self
703            .ctx
704            .module_outputs
705            .get(module.root_struct_descriptor.full_name())?
706            .as_ref();
707        Some(module_output)
708    }
709
710    /// Returns an iterator that yields tuples composed of a YARA module name
711    /// and the protobuf produced by that module.
712    ///
713    /// Only returns the modules that produced some output.
714    pub fn module_outputs(&self) -> ModuleOutputs<'a, 'r> {
715        ModuleOutputs::new(self.ctx)
716    }
717}
718
719/// Iterator that yields the rules that matched during a scan.
720///
721/// Private rules are not included by default, use
722/// [`MatchingRules::include_private`] for changing this behaviour.
723pub struct MatchingRules<'a, 'r> {
724    ctx: &'a ScanContext<'r, 'a>,
725    iterator: Iter<'a, RuleId>,
726    len_non_private: usize,
727    len_private: usize,
728    include_private: bool,
729}
730
731impl<'a, 'r> MatchingRules<'a, 'r> {
732    fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
733        Self {
734            ctx,
735            iterator: ctx.matching_rules.iter(),
736            include_private: false,
737            len_non_private: ctx.matching_rules.len()
738                - ctx.num_matching_private_rules,
739            len_private: ctx.num_matching_private_rules,
740        }
741    }
742
743    /// Specifies whether the iterator should yield private rules.
744    ///
745    /// This does not reset the iterator to its initial state, the iterator will
746    /// continue from its current position.
747    pub fn include_private(mut self, yes: bool) -> Self {
748        self.include_private = yes;
749        self
750    }
751}
752
753impl<'a, 'r> Iterator for MatchingRules<'a, 'r> {
754    type Item = Rule<'a, 'r>;
755
756    fn next(&mut self) -> Option<Self::Item> {
757        let rules = self.ctx.compiled_rules;
758        loop {
759            let rule_id = *self.iterator.next()?;
760            let rule_info = rules.get(rule_id);
761            if rule_info.is_private {
762                self.len_private -= 1;
763            } else {
764                self.len_non_private -= 1;
765            }
766            if self.include_private || !rule_info.is_private {
767                return Some(Rule { ctx: Some(self.ctx), rule_info, rules });
768            }
769        }
770    }
771}
772
773impl ExactSizeIterator for MatchingRules<'_, '_> {
774    #[inline]
775    fn len(&self) -> usize {
776        if self.include_private {
777            self.len_non_private + self.len_private
778        } else {
779            self.len_non_private
780        }
781    }
782}
783
784/// Iterator that yields the rules that didn't match during a scan.
785///
786/// Private rules are not included by default, use
787/// [`NonMatchingRules::include_private`] for changing this behaviour.
788pub struct NonMatchingRules<'a, 'r> {
789    ctx: &'a ScanContext<'r, 'a>,
790    iterator: bitvec::slice::IterZeros<'a, u8, Lsb0>,
791    include_private: bool,
792    len_private: usize,
793    len_non_private: usize,
794}
795
796impl<'a, 'r> NonMatchingRules<'a, 'r> {
797    fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
798        let num_rules = ctx.compiled_rules.num_rules();
799        let main_memory = ctx
800            .wasm_main_memory
801            .unwrap()
802            .data(unsafe { ctx.wasm_store.as_ref() });
803
804        let base = MATCHING_RULES_BITMAP_BASE as usize;
805
806        // Create a BitSlice that covers the region of main memory containing
807        // the bitmap that tells which rules matched and which did not.
808        let matching_rules_bitmap = BitSlice::<_, Lsb0>::from_slice(
809            &main_memory[base..base + num_rules / 8 + 1],
810        );
811
812        // The BitSlice will cover more bits than necessary, for example, if
813        // there are 3 rules the BitSlice will have 8 bits because it is
814        // created from a u8 slice that has 1 byte. Here we make sure that
815        // the BitSlice has exactly as many bits as existing rules.
816        let matching_rules_bitmap = &matching_rules_bitmap[0..num_rules];
817
818        Self {
819            ctx,
820            iterator: matching_rules_bitmap.iter_zeros(),
821            include_private: false,
822            len_non_private: ctx.compiled_rules.num_rules()
823                - ctx.matching_rules.len()
824                - ctx.num_non_matching_private_rules,
825            len_private: ctx.num_non_matching_private_rules,
826        }
827    }
828
829    /// Specifies whether the iterator should yield private rules.
830    ///
831    /// This does not reset the iterator to its initial state, the iterator will
832    /// continue from its current position.
833    pub fn include_private(mut self, yes: bool) -> Self {
834        self.include_private = yes;
835        self
836    }
837}
838
839impl<'a, 'r> Iterator for NonMatchingRules<'a, 'r> {
840    type Item = Rule<'a, 'r>;
841
842    fn next(&mut self) -> Option<Self::Item> {
843        let rules = self.ctx.compiled_rules;
844
845        loop {
846            let rule_id = RuleId::from(self.iterator.next()?);
847            let rule_info = rules.get(rule_id);
848
849            if rule_info.is_private {
850                self.len_private -= 1;
851            } else {
852                self.len_non_private -= 1;
853            }
854
855            if self.include_private || !rule_info.is_private {
856                return Some(Rule { ctx: Some(self.ctx), rule_info, rules });
857            }
858        }
859    }
860}
861
862impl ExactSizeIterator for NonMatchingRules<'_, '_> {
863    #[inline]
864    fn len(&self) -> usize {
865        if self.include_private {
866            self.len_non_private + self.len_private
867        } else {
868            self.len_non_private
869        }
870    }
871}
872
873/// Iterator that returns the outputs produced by YARA modules.
874pub struct ModuleOutputs<'a, 'r> {
875    ctx: &'a ScanContext<'r, 'a>,
876    len: usize,
877    iterator: hash_map::Iter<'a, &'a str, Module>,
878}
879
880impl<'a, 'r> ModuleOutputs<'a, 'r> {
881    fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
882        Self {
883            ctx,
884            len: ctx.module_outputs.len(),
885            iterator: BUILTIN_MODULES.iter(),
886        }
887    }
888}
889
890impl ExactSizeIterator for ModuleOutputs<'_, '_> {
891    #[inline]
892    fn len(&self) -> usize {
893        self.len
894    }
895}
896
897impl<'a> Iterator for ModuleOutputs<'a, '_> {
898    type Item = (&'a str, &'a dyn MessageDyn);
899
900    fn next(&mut self) -> Option<Self::Item> {
901        loop {
902            let (name, module) = self.iterator.next()?;
903            if let Some(module_output) = self
904                .ctx
905                .module_outputs
906                .get(module.root_struct_descriptor.full_name())
907            {
908                return Some((*name, module_output.as_ref()));
909            }
910        }
911    }
912}
913
914#[cfg(test)]
915mod snippet_tests {
916    use super::DataSnippets;
917    use std::collections::BTreeMap;
918
919    #[test]
920    fn snippets() {
921        let mut btree_map = BTreeMap::new();
922
923        btree_map.insert(0, vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
924        btree_map.insert(50, vec![51, 52, 53, 54]);
925
926        let snippets = DataSnippets::MultiBlock(btree_map);
927
928        assert_eq!(snippets.get(0..2), Some([1, 2].as_slice()));
929        assert_eq!(snippets.get(1..3), Some([2, 3].as_slice()));
930        assert_eq!(snippets.get(8..9), Some([9].as_slice()));
931        assert_eq!(snippets.get(9..10), None);
932        assert_eq!(snippets.get(50..51), Some([51].as_slice()));
933        assert_eq!(snippets.get(50..54), Some([51, 52, 53, 54].as_slice()));
934        assert_eq!(snippets.get(52..54), Some([53, 54].as_slice()));
935        assert_eq!(snippets.get(50..56), None);
936    }
937}