Skip to main content

yara_x/scanner/
mod.rs

1/*! This module implements the YARA scanner.
2
3The scanner takes the rules produces by the compiler and scans data with them.
4*/
5use std::collections::{BTreeMap, HashMap, hash_map};
6use std::fmt::{Debug, Formatter};
7use std::fs;
8use std::io::Read;
9use std::mem::transmute;
10use std::ops::Range;
11use std::path::{Path, PathBuf};
12use std::pin::Pin;
13use std::slice::Iter;
14use std::sync::Once;
15use std::sync::atomic::AtomicU64;
16use std::time::Duration;
17
18use bitvec::prelude::*;
19use memmap2::{Mmap, MmapOptions};
20use protobuf::{CodedInputStream, MessageDyn};
21use thiserror::Error;
22
23use crate::compiler::{RuleId, Rules};
24use crate::models::Rule;
25use crate::modules::{BUILTIN_MODULES, Module, ModuleError};
26use crate::scanner::context::create_wasm_store_and_ctx;
27use crate::types::{Struct, TypeValue};
28use crate::variables::VariableError;
29use crate::wasm::MATCHING_RULES_BITMAP_BASE;
30use crate::wasm::runtime::Store;
31use crate::{Variable, modules};
32
33pub(crate) use crate::scanner::context::RuntimeObject;
34pub(crate) use crate::scanner::context::RuntimeObjectHandle;
35pub(crate) use crate::scanner::context::ScanContext;
36pub(crate) use crate::scanner::context::ScanState;
37pub(crate) use crate::scanner::matches::Match;
38
39mod context;
40mod matches;
41
42pub mod blocks;
43
44#[cfg(test)]
45mod tests;
46
47/// Error returned when a scan operation fails.
48#[derive(Error, Debug)]
49#[non_exhaustive]
50pub enum ScanError {
51    /// The scan was aborted after the timeout period.
52    #[error("timeout")]
53    Timeout,
54    /// Could not open the scanned file.
55    #[error("can not open `{path}`: {err}")]
56    OpenError {
57        /// Path of the file being scanned.
58        path: PathBuf,
59        /// Error that occurred.
60        err: std::io::Error,
61    },
62    /// Could not map the scanned file into memory.
63    #[error("can not map `{path}`: {err}")]
64    MapError {
65        /// Path of the file being scanned.
66        path: PathBuf,
67        /// Error that occurred.
68        err: std::io::Error,
69    },
70    /// Could not deserialize the protobuf message for some YARA module.
71    #[error(
72        "can not deserialize protobuf message for YARA module `{module}`: {err}"
73    )]
74    ProtoError {
75        /// Module name.
76        module: String,
77        /// Error that occurred.
78        err: protobuf::Error,
79    },
80    /// The module is unknown.
81    #[error("unknown module `{module}`")]
82    UnknownModule {
83        /// Module name.
84        module: String,
85    },
86    /// Some module produced an error when it was invoked.
87    #[error("error in module `{module}`: {err}")]
88    ModuleError {
89        /// Module name.
90        module: String,
91        /// Error that occurred.
92        err: ModuleError,
93    },
94}
95
96/// Global counter that gets incremented every 1 second by a dedicated thread.
97///
98/// This counter is used for determining when a scan operation has timed out.
99static HEARTBEAT_COUNTER: AtomicU64 = AtomicU64::new(0);
100
101/// Used for spawning the thread that increments `HEARTBEAT_COUNTER`.
102static INIT_HEARTBEAT: Once = Once::new();
103
104/// Represents the data being scanned.
105///
106/// The scanned data can be backed by a slice owned by someone else, or a
107/// vector or memory-mapped file owned by `ScannedData` itself.
108pub enum ScannedData<'d> {
109    Slice(&'d [u8]),
110    Vec(Vec<u8>),
111    Mmap(Mmap),
112}
113
114impl AsRef<[u8]> for ScannedData<'_> {
115    fn as_ref(&self) -> &[u8] {
116        match self {
117            ScannedData::Slice(s) => s,
118            ScannedData::Vec(v) => v.as_ref(),
119            ScannedData::Mmap(m) => m.as_ref(),
120        }
121    }
122}
123
124impl<'d> TryInto<ScannedData<'d>> for &'d [u8] {
125    type Error = ScanError;
126    fn try_into(self) -> Result<ScannedData<'d>, Self::Error> {
127        Ok(ScannedData::Slice(self))
128    }
129}
130
131impl<'d, const N: usize> TryInto<ScannedData<'d>> for &'d [u8; N] {
132    type Error = ScanError;
133    fn try_into(self) -> Result<ScannedData<'d>, Self::Error> {
134        Ok(ScannedData::Slice(self))
135    }
136}
137
138/// Contains information about the time spent on a rule.
139#[cfg(feature = "rules-profiling")]
140pub struct ProfilingData<'r> {
141    /// Rule namespace.
142    pub namespace: &'r str,
143    /// Rule name.
144    pub rule: &'r str,
145    /// Time spent executing the rule's condition.
146    pub condition_exec_time: Duration,
147    /// Time spent matching the rule's patterns.
148    pub pattern_matching_time: Duration,
149}
150
151/// Optional information for the scan operation.
152#[derive(Debug, Default)]
153pub struct ScanOptions<'a> {
154    module_metadata: HashMap<&'a str, &'a [u8]>,
155}
156
157impl<'a> ScanOptions<'a> {
158    /// Creates a new instance of `ScanOptions` with no additional information
159    /// for the scan operation.
160    ///
161    /// Use other methods to add additional information.
162    pub fn new() -> Self {
163        Self { module_metadata: Default::default() }
164    }
165
166    /// Adds metadata for a YARA module.
167    pub fn set_module_metadata(
168        mut self,
169        module_name: &'a str,
170        metadata: &'a [u8],
171    ) -> Self {
172        self.module_metadata.insert(module_name, metadata);
173        self
174    }
175}
176
177/// Scans data with already compiled YARA rules.
178///
179/// The scanner receives a set of compiled [`Rules`] and scans data with those
180/// rules. The same scanner can be used for scanning multiple files or
181/// in-memory data sequentially, but you need multiple scanners for scanning in
182/// parallel.
183pub struct Scanner<'r> {
184    _rules: &'r Rules,
185    wasm_store: Pin<Box<Store<ScanContext<'static, 'static>>>>,
186    use_mmap: bool,
187}
188
189impl<'r> Scanner<'r> {
190    /// Creates a new scanner.
191    pub fn new(rules: &'r Rules) -> Self {
192        let wasm_store = create_wasm_store_and_ctx(rules);
193        Self { _rules: rules, wasm_store, use_mmap: true }
194    }
195
196    /// Sets a timeout for scan operations.
197    ///
198    /// The scan functions will return an [ScanError::Timeout] once the
199    /// provided timeout duration has elapsed. The scanner will make every
200    /// effort to stop promptly after the designated timeout duration. However,
201    /// in some cases, particularly with rules containing only a few patterns,
202    /// the scanner could potentially continue running for a longer period than
203    /// the specified timeout.
204    pub fn set_timeout(&mut self, timeout: Duration) -> &mut Self {
205        self.scan_context_mut().set_timeout(timeout);
206        self
207    }
208
209    /// Sets the maximum number of matches per pattern.
210    ///
211    /// When some pattern reaches the maximum number of patterns it won't
212    /// produce more matches.
213    pub fn max_matches_per_pattern(&mut self, n: usize) -> &mut Self {
214        self.scan_context_mut().pattern_matches.max_matches_per_pattern(n);
215        self
216    }
217
218    /// Specifies whether [`Scanner::scan_file`] and [`Scanner::scan_file_with_options`]
219    /// may use memory-mapped files to read input.
220    ///
221    /// By default, the scanner uses memory mapping for very large files, as this
222    /// is typically faster than copying file contents into memory. However, this
223    /// approach has a drawback: if another process truncates the file during
224    /// scanning, a `SIGBUS` signal may occur.
225    ///
226    /// Setting this option disables memory mapping and forces the scanner to
227    /// always read files into an in-memory buffer instead. This method is slower,
228    /// but safer.
229    pub fn use_mmap(&mut self, yes: bool) -> &mut Self {
230        self.use_mmap = yes;
231        self
232    }
233
234    /// Sets a callback that is invoked every time a YARA rule calls the
235    /// `console` module.
236    ///
237    /// The `callback` function is invoked with a string representing the
238    /// message being logged. The function can print the message to stdout,
239    /// append it to a file, etc. If no callback is set these messages are
240    /// ignored.
241    pub fn console_log<F>(&mut self, callback: F) -> &mut Self
242    where
243        F: FnMut(String) + 'r,
244    {
245        self.scan_context_mut().console_log = Some(Box::new(callback));
246        self
247    }
248
249    /// Scans in-memory data.
250    pub fn scan<'a>(
251        &'a mut self,
252        data: &'a [u8],
253    ) -> Result<ScanResults<'a, 'r>, ScanError> {
254        self.scan_impl(data.try_into()?, None)
255    }
256
257    /// Scans a file.
258    pub fn scan_file<'a, P>(
259        &'a mut self,
260        target: P,
261    ) -> Result<ScanResults<'a, 'r>, ScanError>
262    where
263        P: AsRef<Path>,
264    {
265        self.scan_impl(self.load_file(target.as_ref())?, None)
266    }
267
268    /// Like [`Scanner::scan`], but allows to specify additional scan options.
269    pub fn scan_with_options<'a, 'opts>(
270        &'a mut self,
271        data: &'a [u8],
272        options: ScanOptions<'opts>,
273    ) -> Result<ScanResults<'a, 'r>, ScanError> {
274        self.scan_impl(ScannedData::Slice(data), Some(options))
275    }
276
277    /// Like [`Scanner::scan_file`], but allows to specify additional scan
278    /// options.
279    pub fn scan_file_with_options<'opts, P>(
280        &mut self,
281        target: P,
282        options: ScanOptions<'opts>,
283    ) -> Result<ScanResults<'_, 'r>, ScanError>
284    where
285        P: AsRef<Path>,
286    {
287        self.scan_impl(self.load_file(target.as_ref())?, Some(options))
288    }
289
290    /// Sets the value of a global variable.
291    ///
292    /// The variable must has been previously defined by calling
293    /// [`crate::Compiler::define_global`], and the type it has during the
294    /// definition must match the type of the new value (`T`).
295    ///
296    /// The variable will retain the new value in subsequent scans, unless this
297    /// function is called again for setting a new value.
298    pub fn set_global<T: TryInto<Variable>>(
299        &mut self,
300        ident: &str,
301        value: T,
302    ) -> Result<&mut Self, VariableError>
303    where
304        VariableError: From<<T as TryInto<Variable>>::Error>,
305    {
306        self.scan_context_mut().set_global(ident, value)?;
307        Ok(self)
308    }
309
310    /// Sets the output data for a YARA module.
311    ///
312    /// Each YARA module generates an output consisting of a data structure that
313    /// contains information about the scanned file. This data structure is
314    /// represented by a Protocol Buffer message. Typically, you won't need to
315    /// provide this data yourself, as the YARA module automatically generates
316    /// different outputs for each file it scans.
317    ///
318    /// However, there are two scenarios in which you may want to provide the
319    /// output for a module yourself:
320    ///
321    /// 1) When the module does not produce any output on its own.
322    /// 2) When you already know the output of the module for the upcoming file
323    ///    to be scanned, and you prefer to reuse this data instead of generating
324    ///    it again.
325    ///
326    /// Case 1) applies to certain modules lacking a main function, thus
327    /// incapable of producing any output on their own. For such modules, you
328    /// must set the output before scanning the associated data. Since the
329    /// module's output typically varies with each scanned file, you need to
330    /// call [`Scanner::set_module_output`] prior to each invocation of
331    /// [`Scanner::scan`]. Once [`Scanner::scan`] is executed, the module's
332    /// output is consumed and will be empty unless set again before the
333    /// subsequent call.
334    ///
335    /// Case 2) applies when you have previously stored the module's output for
336    /// certain scanned data. In such cases, when rescanning the data, you can
337    /// utilize this function to supply the module's output, thereby preventing
338    /// redundant computation by the module. This optimization enhances
339    /// performance by eliminating the need for the module to reparse the
340    /// scanned data.
341    ///
342    /// <br>
343    ///
344    /// The `data` argument must be a Protocol Buffer message corresponding
345    /// to any of the existing YARA modules.
346    pub fn set_module_output(
347        &mut self,
348        data: Box<dyn MessageDyn>,
349    ) -> Result<&mut Self, ScanError> {
350        let descriptor = data.descriptor_dyn();
351        let full_name = descriptor.full_name();
352
353        // Check if the protobuf message passed to this function corresponds
354        // with any of the existing modules.
355        if !BUILTIN_MODULES
356            .iter()
357            .any(|m| m.1.root_struct_descriptor.full_name() == full_name)
358        {
359            return Err(ScanError::UnknownModule {
360                module: full_name.to_string(),
361            });
362        }
363
364        self.scan_context_mut()
365            .user_provided_module_outputs
366            .insert(full_name.to_string(), data);
367
368        Ok(self)
369    }
370
371    /// Similar to [`Scanner::set_module_output`], but receives a module name
372    /// and the protobuf message as raw data.
373    ///
374    /// `name` can be either the YARA module name (i.e: "pe", "elf", "dotnet",
375    /// etc.) or the fully-qualified name for the protobuf message associated
376    /// to the module (i.e: "pe.PE", "elf.ELF", "dotnet.Dotnet", etc.).
377    pub fn set_module_output_raw(
378        &mut self,
379        name: &str,
380        data: &[u8],
381    ) -> Result<&mut Self, ScanError> {
382        // Try to find the module by name first, if not found, then try
383        // to find a module where the fully-qualified name for its protobuf
384        // message matches the `name` arguments.
385        let descriptor = if let Some(module) = BUILTIN_MODULES.get(name) {
386            Some(&module.root_struct_descriptor)
387        } else {
388            BUILTIN_MODULES.values().find_map(|module| {
389                if module.root_struct_descriptor.full_name() == name {
390                    Some(&module.root_struct_descriptor)
391                } else {
392                    None
393                }
394            })
395        };
396
397        if descriptor.is_none() {
398            return Err(ScanError::UnknownModule { module: name.to_string() });
399        }
400
401        let mut is = CodedInputStream::from_bytes(data);
402
403        // Default recursion limit is 100, that's not enough for some deeply
404        // nested structures like the process tree in the `vt` module.
405        is.set_recursion_limit(500);
406
407        self.set_module_output(
408            descriptor.unwrap().parse_from(&mut is).map_err(|err| {
409                ScanError::ProtoError { module: name.to_string(), err }
410            })?,
411        )
412    }
413
414    /// Returns profiling data for the slowest N rules.
415    ///
416    /// The profiling data reflects the cumulative execution time of each rule
417    /// across all scanned files. This information is useful for identifying
418    /// performance bottlenecks. To reset the profiling data and start fresh
419    /// for subsequent scans, use [`Scanner::clear_profiling_data`].
420    #[cfg(feature = "rules-profiling")]
421    pub fn slowest_rules(&self, n: usize) -> Vec<ProfilingData<'_>> {
422        self.scan_context().slowest_rules(n)
423    }
424
425    /// Clears all accumulated profiling data.
426    ///
427    /// This method resets the profiling data collected during rule execution
428    /// across scanned files. Use this to start a new profiling session, ensuring
429    /// the results reflect only the data gathered after this method is called.
430    #[cfg(feature = "rules-profiling")]
431    pub fn clear_profiling_data(&mut self) {
432        self.scan_context_mut().clear_profiling_data()
433    }
434}
435
436impl<'r> Scanner<'r> {
437    #[cfg(feature = "rules-profiling")]
438    #[inline]
439    fn scan_context<'a>(&self) -> &ScanContext<'r, 'a> {
440        unsafe {
441            transmute::<&ScanContext<'static, 'static>, &ScanContext<'r, '_>>(
442                self.wasm_store.data(),
443            )
444        }
445    }
446    #[inline]
447    fn scan_context_mut<'a>(&mut self) -> &mut ScanContext<'r, 'a> {
448        unsafe {
449            transmute::<
450                &mut ScanContext<'static, 'static>,
451                &mut ScanContext<'r, '_>,
452            >(self.wasm_store.data_mut())
453        }
454    }
455
456    fn load_file<'a>(
457        &self,
458        path: &Path,
459    ) -> Result<ScannedData<'a>, ScanError> {
460        let mut file = fs::File::open(path).map_err(|err| {
461            ScanError::OpenError { path: path.to_path_buf(), err }
462        })?;
463
464        let size = file.metadata().map(|m| m.len()).unwrap_or(0);
465
466        let mut buffered_file;
467        let mapped_file;
468
469        // For files smaller than ~500MB reading the whole file is faster than
470        // using a memory-mapped file.
471        let data = if self.use_mmap && size > 500_000_000 {
472            mapped_file = unsafe {
473                MmapOptions::new().map_copy_read_only(&file).map_err(|err| {
474                    ScanError::MapError { path: path.to_path_buf(), err }
475                })
476            }?;
477            ScannedData::Mmap(mapped_file)
478        } else {
479            buffered_file = Vec::with_capacity(size as usize);
480            file.read_to_end(&mut buffered_file).map_err(|err| {
481                ScanError::OpenError { path: path.to_path_buf(), err }
482            })?;
483            ScannedData::Vec(buffered_file)
484        };
485
486        Ok(data)
487    }
488
489    fn scan_impl<'a, 'opts>(
490        &'a mut self,
491        data: ScannedData<'a>,
492        options: Option<ScanOptions<'opts>>,
493    ) -> Result<ScanResults<'a, 'r>, ScanError> {
494        let ctx = self.scan_context_mut();
495
496        // Clear information about matches found in a previous scan, if any.
497        ctx.reset();
498
499        // Set the global variable `filesize` to the size of the scanned data.
500        ctx.set_filesize(data.as_ref().len() as i64);
501
502        // Indicate that the scanner is currently scanning the given data.
503        ctx.scan_state = ScanState::ScanningData(data);
504
505        for module_name in ctx.compiled_rules.imports() {
506            // Lookup the module in the list of built-in modules.
507            let module = modules::BUILTIN_MODULES
508                .get(module_name)
509                .unwrap_or_else(|| panic!("module `{module_name}` not found"));
510
511            let root_struct_name = module.root_struct_descriptor.full_name();
512
513            let module_output;
514            // If the user already provided some output for the module by
515            // calling `Scanner::set_module_output`, use that output. If not,
516            // call the module's main function (if the module has a main
517            // function) for getting its output.
518            if let Some(output) =
519                ctx.user_provided_module_outputs.remove(root_struct_name)
520            {
521                module_output = Some(output);
522            } else {
523                let meta: Option<&'opts [u8]> =
524                    options.as_ref().and_then(|options| {
525                        options.module_metadata.get(module_name).copied()
526                    });
527
528                if let Some(main_fn) = module.main_fn {
529                    module_output = Some(
530                        main_fn(ctx.scanned_data().unwrap(), meta).map_err(
531                            |err| ScanError::ModuleError {
532                                module: module_name.to_string(),
533                                err,
534                            },
535                        )?,
536                    );
537                } else {
538                    module_output = None;
539                }
540            }
541
542            if let Some(module_output) = &module_output {
543                // Make sure that the module is returning a protobuf message of
544                // the expected type.
545                debug_assert_eq!(
546                    module_output.descriptor_dyn().full_name(),
547                    module.root_struct_descriptor.full_name(),
548                    "main function of module `{}` must return `{}`, but returned `{}`",
549                    module_name,
550                    module.root_struct_descriptor.full_name(),
551                    module_output.descriptor_dyn().full_name(),
552                );
553
554                // Make sure that the module is returning a protobuf message
555                // where all required fields are initialized. This only applies
556                // to proto2, proto3 doesn't have "required" fields, all fields
557                // are optional.
558                debug_assert!(
559                    module_output.is_initialized_dyn(),
560                    "module `{}` returned a protobuf `{}` where some required fields are not initialized ",
561                    module_name,
562                    module.root_struct_descriptor.full_name()
563                );
564            }
565
566            // When constant folding is enabled we don't need to generate
567            // structure fields for enums. This is because during the
568            // optimization process symbols like MyEnum.ENUM_ITEM are resolved
569            // to their constant values at compile time. In other words, the
570            // compiler determines that MyEnum.ENUM_ITEM is equal to some value
571            // X, and uses that value in the generated code.
572            //
573            // However, without constant folding, enums are treated as any
574            // other field in a struct, and their values are determined at scan
575            // time. For that reason these fields must be generated for enums
576            // when constant folding is disabled.
577            let generate_fields_for_enums =
578                !cfg!(feature = "constant-folding");
579
580            let module_struct = Struct::from_proto_descriptor_and_msg(
581                &module.root_struct_descriptor,
582                module_output.as_deref(),
583                generate_fields_for_enums,
584            );
585
586            if let Some(module_output) = module_output {
587                ctx.module_outputs
588                    .insert(root_struct_name.to_string(), module_output);
589            }
590
591            // The data structure obtained from the module is added to the
592            // root structure. Any data from previous scans will be replaced
593            // with the new data structure.
594            ctx.root_struct
595                .add_field(module_name, TypeValue::Struct(module_struct));
596        }
597
598        // The user provided module outputs are not needed anymore. Let's
599        // clear any remaining entry in the hash map (which can happen if
600        // the user has set outputs for modules that are not even imported
601        // by the rules.
602        ctx.user_provided_module_outputs.clear();
603
604        // Clear the flag that indicates that the search phase was done.
605        ctx.set_pattern_search_done(false);
606
607        // Evaluate the conditions of every rule, this will call
608        // `ScanContext::search_for_patterns` if necessary.
609        ctx.eval_conditions()?;
610
611        let data = match ctx.scan_state.take() {
612            ScanState::ScanningData(data) => data,
613            _ => unreachable!(),
614        };
615
616        ctx.scan_state = ScanState::Finished(DataSnippets::SingleBlock(data));
617
618        Ok(ScanResults::new(ctx))
619    }
620}
621
622/// Helper type that exposes the data matched during a scan operation.
623///
624/// Matching data can be accessed through the [`Match::data`] method. Normally,
625/// this data can be retrieved by slicing directly into the scanned input.
626/// However, that requires the original input to remain valid until the scan
627/// results are processed. This works fine for a single contiguous block of
628/// memory, but is impractical when scanning multiple blocks, since holding
629/// onto all of them until the end would consume excessive memory.
630///
631/// To handle this, two strategies are used:
632///
633/// - **Single-block scans**: Data is accessed directly from the input slice.
634/// - **Multi-block scans**: Matching fragments are copied and retained in a
635///   BTreeMap until the results are processed. The keys in the btree are
636///   the offsets where the snippets start and the values are vectors with
637///   the snippet's data.
638///
639/// Each strategy corresponds to a variant in this enum.
640pub(crate) enum DataSnippets<'d> {
641    SingleBlock(ScannedData<'d>),
642    MultiBlock(BTreeMap<usize, Vec<u8>>),
643}
644
645impl DataSnippets<'_> {
646    pub(crate) fn get(&self, range: Range<usize>) -> Option<&[u8]> {
647        match self {
648            Self::SingleBlock(data) => data.as_ref().get(range),
649            Self::MultiBlock(btree) => {
650                // Find in the btree the snippet that starts exactly at the
651                // offset indicated by range.start, if not found, take the
652                // previous one, which may also contain the requested range.
653                let (snippet_offset, snippet_data) =
654                    btree.range(..=range.start).next_back()?;
655
656                // Calculate the start and end of the slice within the snippet.
657                let start = range.start - snippet_offset;
658                let end = range.end - snippet_offset;
659
660                // Returns the data, or `None` if `start` and `end` are not
661                // within the snippet boundaries.
662                snippet_data.get(start..end)
663            }
664        }
665    }
666}
667
668/// Results of a scan operation.
669///
670/// Allows iterating over both the matching and non-matching rules.
671pub struct ScanResults<'a, 'r> {
672    ctx: &'a ScanContext<'r, 'a>,
673}
674
675impl Debug for ScanResults<'_, '_> {
676    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
677        f.write_str("ScanResults")
678    }
679}
680
681impl<'a, 'r> ScanResults<'a, 'r> {
682    fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
683        Self { ctx }
684    }
685
686    /// Returns an iterator that yields the matching rules in arbitrary order.
687    pub fn matching_rules(&self) -> MatchingRules<'_, 'r> {
688        MatchingRules::new(self.ctx)
689    }
690
691    /// Returns an iterator that yields the non-matching rules in arbitrary
692    /// order.
693    pub fn non_matching_rules(&self) -> NonMatchingRules<'_, 'r> {
694        NonMatchingRules::new(self.ctx)
695    }
696
697    /// Returns the protobuf produced by a YARA module after processing the
698    /// data.
699    ///
700    /// The result will be `None` if the module doesn't exist or didn't
701    /// produce any output.
702    pub fn module_output(
703        &self,
704        module_name: &str,
705    ) -> Option<&'a dyn MessageDyn> {
706        let module = BUILTIN_MODULES.get(module_name)?;
707        let module_output = self
708            .ctx
709            .module_outputs
710            .get(module.root_struct_descriptor.full_name())?
711            .as_ref();
712        Some(module_output)
713    }
714
715    /// Returns an iterator that yields tuples composed of a YARA module name
716    /// and the protobuf produced by that module.
717    ///
718    /// Only returns the modules that produced some output.
719    pub fn module_outputs(&self) -> ModuleOutputs<'a, 'r> {
720        ModuleOutputs::new(self.ctx)
721    }
722}
723
724/// Iterator that yields the rules that matched during a scan.
725///
726/// Private rules are not included by default, use
727/// [`MatchingRules::include_private`] for changing this behaviour.
728pub struct MatchingRules<'a, 'r> {
729    ctx: &'a ScanContext<'r, 'a>,
730    iterator: Iter<'a, RuleId>,
731    len_non_private: usize,
732    len_private: usize,
733    include_private: bool,
734}
735
736impl<'a, 'r> MatchingRules<'a, 'r> {
737    fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
738        Self {
739            ctx,
740            iterator: ctx.matching_rules.iter(),
741            include_private: false,
742            len_non_private: ctx.matching_rules.len()
743                - ctx.num_matching_private_rules,
744            len_private: ctx.num_matching_private_rules,
745        }
746    }
747
748    /// Specifies whether the iterator should yield private rules.
749    ///
750    /// This does not reset the iterator to its initial state, the iterator will
751    /// continue from its current position.
752    pub fn include_private(mut self, yes: bool) -> Self {
753        self.include_private = yes;
754        self
755    }
756}
757
758impl<'a, 'r> Iterator for MatchingRules<'a, 'r> {
759    type Item = Rule<'a, 'r>;
760
761    fn next(&mut self) -> Option<Self::Item> {
762        let rules = self.ctx.compiled_rules;
763        loop {
764            let rule_id = *self.iterator.next()?;
765            let rule_info = rules.get(rule_id);
766            if rule_info.is_private {
767                self.len_private -= 1;
768            } else {
769                self.len_non_private -= 1;
770            }
771            if self.include_private || !rule_info.is_private {
772                return Some(Rule { ctx: Some(self.ctx), rule_info, rules });
773            }
774        }
775    }
776}
777
778impl ExactSizeIterator for MatchingRules<'_, '_> {
779    #[inline]
780    fn len(&self) -> usize {
781        if self.include_private {
782            self.len_non_private + self.len_private
783        } else {
784            self.len_non_private
785        }
786    }
787}
788
789/// Iterator that yields the rules that didn't match during a scan.
790///
791/// Private rules are not included by default, use
792/// [`NonMatchingRules::include_private`] for changing this behaviour.
793pub struct NonMatchingRules<'a, 'r> {
794    ctx: &'a ScanContext<'r, 'a>,
795    iterator: bitvec::slice::IterZeros<'a, u8, Lsb0>,
796    include_private: bool,
797    len_private: usize,
798    len_non_private: usize,
799}
800
801impl<'a, 'r> NonMatchingRules<'a, 'r> {
802    fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
803        let num_rules = ctx.compiled_rules.num_rules();
804        let main_memory = ctx
805            .wasm_main_memory
806            .unwrap()
807            .data(unsafe { ctx.wasm_store.as_ref() });
808
809        let base = MATCHING_RULES_BITMAP_BASE as usize;
810
811        // Create a BitSlice that covers the region of main memory containing
812        // the bitmap that tells which rules matched and which did not.
813        let matching_rules_bitmap = BitSlice::<_, Lsb0>::from_slice(
814            &main_memory[base..base + num_rules / 8 + 1],
815        );
816
817        // The BitSlice will cover more bits than necessary, for example, if
818        // there are 3 rules the BitSlice will have 8 bits because it is
819        // created from a u8 slice that has 1 byte. Here we make sure that
820        // the BitSlice has exactly as many bits as existing rules.
821        let matching_rules_bitmap = &matching_rules_bitmap[0..num_rules];
822
823        Self {
824            ctx,
825            iterator: matching_rules_bitmap.iter_zeros(),
826            include_private: false,
827            len_non_private: ctx.compiled_rules.num_rules()
828                - ctx.matching_rules.len()
829                - ctx.num_non_matching_private_rules,
830            len_private: ctx.num_non_matching_private_rules,
831        }
832    }
833
834    /// Specifies whether the iterator should yield private rules.
835    ///
836    /// This does not reset the iterator to its initial state, the iterator will
837    /// continue from its current position.
838    pub fn include_private(mut self, yes: bool) -> Self {
839        self.include_private = yes;
840        self
841    }
842}
843
844impl<'a, 'r> Iterator for NonMatchingRules<'a, 'r> {
845    type Item = Rule<'a, 'r>;
846
847    fn next(&mut self) -> Option<Self::Item> {
848        let rules = self.ctx.compiled_rules;
849
850        loop {
851            let rule_id = RuleId::from(self.iterator.next()?);
852            let rule_info = rules.get(rule_id);
853
854            if rule_info.is_private {
855                self.len_private -= 1;
856            } else {
857                self.len_non_private -= 1;
858            }
859
860            if self.include_private || !rule_info.is_private {
861                return Some(Rule { ctx: Some(self.ctx), rule_info, rules });
862            }
863        }
864    }
865}
866
867impl ExactSizeIterator for NonMatchingRules<'_, '_> {
868    #[inline]
869    fn len(&self) -> usize {
870        if self.include_private {
871            self.len_non_private + self.len_private
872        } else {
873            self.len_non_private
874        }
875    }
876}
877
878/// Iterator that returns the outputs produced by YARA modules.
879pub struct ModuleOutputs<'a, 'r> {
880    ctx: &'a ScanContext<'r, 'a>,
881    len: usize,
882    iterator: hash_map::Iter<'a, &'a str, Module>,
883}
884
885impl<'a, 'r> ModuleOutputs<'a, 'r> {
886    fn new(ctx: &'a ScanContext<'r, 'a>) -> Self {
887        Self {
888            ctx,
889            len: ctx.module_outputs.len(),
890            iterator: BUILTIN_MODULES.iter(),
891        }
892    }
893}
894
895impl ExactSizeIterator for ModuleOutputs<'_, '_> {
896    #[inline]
897    fn len(&self) -> usize {
898        self.len
899    }
900}
901
902impl<'a> Iterator for ModuleOutputs<'a, '_> {
903    type Item = (&'a str, &'a dyn MessageDyn);
904
905    fn next(&mut self) -> Option<Self::Item> {
906        loop {
907            let (name, module) = self.iterator.next()?;
908            if let Some(module_output) = self
909                .ctx
910                .module_outputs
911                .get(module.root_struct_descriptor.full_name())
912            {
913                return Some((*name, module_output.as_ref()));
914            }
915        }
916    }
917}
918
919#[cfg(test)]
920mod snippet_tests {
921    use super::DataSnippets;
922    use std::collections::BTreeMap;
923
924    #[test]
925    fn snippets() {
926        let mut btree_map = BTreeMap::new();
927
928        btree_map.insert(0, vec![1, 2, 3, 4, 5, 6, 7, 8, 9]);
929        btree_map.insert(50, vec![51, 52, 53, 54]);
930
931        let snippets = DataSnippets::MultiBlock(btree_map);
932
933        assert_eq!(snippets.get(0..2), Some([1, 2].as_slice()));
934        assert_eq!(snippets.get(1..3), Some([2, 3].as_slice()));
935        assert_eq!(snippets.get(8..9), Some([9].as_slice()));
936        assert_eq!(snippets.get(9..10), None);
937        assert_eq!(snippets.get(50..51), Some([51].as_slice()));
938        assert_eq!(snippets.get(50..54), Some([51, 52, 53, 54].as_slice()));
939        assert_eq!(snippets.get(52..54), Some([53, 54].as_slice()));
940        assert_eq!(snippets.get(50..56), None);
941    }
942}