Skip to main content

yara_x/modules/
mod.rs

1use std::sync::LazyLock;
2
3use protobuf::MessageDyn;
4use protobuf::reflect::MessageDescriptor;
5use rustc_hash::FxHashMap;
6
7use thiserror::Error;
8
9pub mod protos {
10    #[cfg(feature = "generate-proto-code")]
11    include!(concat!(env!("OUT_DIR"), "/protos/mod.rs"));
12
13    #[cfg(not(feature = "generate-proto-code"))]
14    include!("protos/generated/mod.rs");
15}
16
17#[cfg(test)]
18mod tests;
19
20pub(crate) mod field_docs;
21
22#[allow(unused_imports)]
23pub(crate) mod prelude {
24    pub(crate) use crate::scanner::ScanContext;
25    pub(crate) use crate::wasm::runtime::Caller;
26    pub(crate) use crate::wasm::string::FixedLenString;
27    pub(crate) use crate::wasm::string::RuntimeString;
28    pub(crate) use crate::wasm::string::String as _;
29    pub(crate) use crate::wasm::string::{Lowercase, Uppercase};
30    pub(crate) use crate::wasm::*;
31    pub(crate) use bstr::ByteSlice;
32    #[cfg(not(feature = "inventory"))]
33    pub(crate) use linkme::distributed_slice;
34    pub(crate) use yara_x_macros::{module_export, module_main, wasm_export};
35}
36include!("modules.rs");
37
38/// Enum describing errors occurred in modules.
39#[derive(Error, Debug)]
40#[non_exhaustive]
41pub enum ModuleError {
42    /// Invalid format of module metadata.
43    #[error("invalid metadata: {err}")]
44    MetadataError {
45        /// The error that actually occurred.
46        err: String,
47    },
48    /// Error occurred when processing the input data.
49    #[error("internal error: {err}")]
50    InternalError {
51        /// The error that actually occurred.
52        err: String,
53    },
54}
55
56/// Signature of a module's main function.
57type MainFn =
58    fn(&[u8], Option<&[u8]>) -> Result<Box<dyn MessageDyn>, ModuleError>;
59
60/// A structure describing a YARA module.
61pub(crate) struct Module {
62    /// Pointer to the module's main function.
63    pub main_fn: Option<MainFn>,
64    /// Name of the Rust module, if any, that contains code for this YARA
65    /// module (e.g: "test_proto2").
66    pub rust_module_name: Option<&'static str>,
67    /// A [`MessageDescriptor`] that describes the module's structure. This
68    /// corresponds to the protobuf message declared in the "root_message"
69    /// for the YARA module. It allows iterating the fields declared by the
70    /// module and obtaining their names and types.
71    pub root_struct_descriptor: MessageDescriptor,
72}
73
74/// Macro that adds a module to the `BUILTIN_MODULES` map.
75///
76/// This macro is used by `add_modules.rs`, a file that is automatically
77/// generated by `build.rs` based on the Protocol Buffers defined in the
78/// `src/modules/protos` directory.
79///
80/// # Example
81///
82/// add_module!(modules, "test", test, "Test", test_mod, Some(test::main as
83/// MainFn));
84macro_rules! add_module {
85    ($modules:expr, $name:literal, $proto:ident, $root_message:literal, $rust_module_name:expr, $main_fn:expr) => {{
86        use std::stringify;
87        let root_struct_descriptor = protos::$proto::file_descriptor()
88            // message_by_full_name expects a dot (.) at the beginning
89            // of the name.
90            .message_by_full_name(format!(".{}", $root_message).as_str())
91            .expect(format!(
92                "`root_message` option in protobuf `{}` is wrong, message `{}` is not defined",
93                stringify!($proto),
94                $root_message
95            ).as_str());
96
97        $modules.insert(
98            $name,
99            Module {
100                main_fn: $main_fn,
101                rust_module_name: $rust_module_name,
102                root_struct_descriptor,
103            },
104        );
105    }};
106}
107
108/// `BUILTIN_MODULES` is a static, global map where keys are module names
109/// and values are [`Module`] structures that describe a YARA module.
110///
111/// This table is populated with the modules defined by a `.proto` file in
112/// `src/modules/protos`. Each `.proto` file that contains a statement like
113/// the following one defines a YARA module:
114///
115/// ```protobuf
116/// option (yara.module_options) = {
117///   name : "foo"
118///   root_message: "Foo"
119///   rust_module: "foo"
120/// };
121/// ```
122///
123/// The `name` field is the module's name (i.e: the name used in `import`
124/// statements), which is also the key in `BUILTIN_MODULES`. `root_message`
125/// is the name of the message that describes the module's structure. This
126/// is required because a `.proto` file can define more than one message.
127///
128/// `rust_module` is the name of the Rust module where functions exported
129/// by the YARA module are defined. This field is optional, if not provided
130/// the module is considered a data-only module.
131pub(crate) static BUILTIN_MODULES: LazyLock<FxHashMap<&'static str, Module>> =
132    LazyLock::new(|| {
133        let mut modules = FxHashMap::default();
134        // The `add_modules.rs` file is automatically generated at compile time
135        // by `build.rs`. This is an example of how `add_modules.rs` looks like:
136        //
137        // {
138        //  #[cfg(feature = "pe_module")]
139        //  add_module!(modules, "pe", pe, "pe.PE", Some("pe"), Some(pe::__main__ as MainFn));
140        //
141        //  #[cfg(feature = "elf_module")]
142        //  add_module!(modules, "elf", elf, "elf.ELF", Some("elf"), Some(elf::__main__ as MainFn));
143        // }
144        //
145        // `add_modules.rs` will contain an `add_module!` statement for each
146        // protobuf in `src/modules/protos` defining a YARA module.
147        include!("add_modules.rs");
148        modules
149    });
150
151pub mod mods {
152    /*! Utility functions and structures that allow invoking YARA modules directly.
153
154    The utility functions [`invoke`], [`invoke_dyn`] and [`invoke_all`]
155    allow leveraging YARA modules for parsing some file formats independently
156    of any YARA rule. With these functions you can pass arbitrary data to a
157    YARA module and obtain the same data structure that is accessible to YARA
158    rules and which you use in your rule conditions.
159
160    This allows external projects to benefit from YARA's file-parsing
161    capabilities for their own purposes.
162
163    # Example
164
165    ```rust
166    # use yara_x;
167    let pe_info = yara_x::mods::invoke::<yara_x::mods::PE>(&[]);
168    ```
169    */
170
171    /// Data structures defined by the `crx` module.
172    ///
173    /// The main structure produced by the module is [`crx::Crx`]. The rest
174    /// of them are used by one or more fields in the main structure.
175    ///
176    pub use super::protos::crx;
177    /// Data structure returned by the `crx` module.
178    pub use super::protos::crx::Crx;
179    /// Data structures defined by the `dex` module.
180    ///
181    /// The main structure produced by the module is [`dex::Dex`]. The rest
182    /// of them are used by one or more fields in the main structure.
183    ///
184    pub use super::protos::dex;
185    /// Data structure returned by the `dex` module.
186    pub use super::protos::dex::Dex;
187
188    /// Data structures defined by the `dotnet` module.
189    ///
190    /// The main structure produced by the module is [`dotnet::Dotnet`]. The
191    /// rest of them are used by one or more fields in the main structure.
192    ///
193    pub use super::protos::dotnet;
194    /// Data structure returned by the `dotnet` module.
195    pub use super::protos::dotnet::Dotnet;
196
197    /// Data structures defined by the `elf` module.
198    ///
199    /// The main structure produced by the module is [`elf::ELF`]. The rest of
200    /// them are used by one or more fields in the main structure.
201    ///
202    pub use super::protos::elf;
203    /// Data structure returned by the `elf` module.
204    pub use super::protos::elf::ELF;
205
206    /// Data structures defined by the `lnk` module.
207    ///
208    /// The main structure produced by the module is [`lnk::Lnk`]. The rest of
209    /// them are used by one or more fields in the main structure.
210    ///
211    pub use super::protos::lnk;
212    /// Data structure returned by the `lnk` module.
213    pub use super::protos::lnk::Lnk;
214
215    /// Data structures defined by the `macho` module.
216    ///
217    /// The main structure produced by the module is [`macho::Macho`]. The rest
218    /// of them are used by one or more fields in the main structure.
219    ///
220    pub use super::protos::macho;
221    /// Data structure returned by the `macho` module.
222    pub use super::protos::macho::Macho;
223
224    /// Data structures defined by the `pe` module.
225    ///
226    /// The main structure produced by the module is [`pe::PE`]. The rest
227    /// of them are used by one or more fields in the main structure.
228    ///
229    pub use super::protos::pe;
230    /// Data structure returned by the `pe` module.
231    pub use super::protos::pe::PE;
232
233    /// A data structure containing the data returned by all modules.
234    pub use super::protos::mods::Modules;
235
236    /// Invokes a YARA module with arbitrary data.
237    ///
238    /// <br>
239    ///
240    /// YARA modules typically parse specific file formats, returning structures
241    /// that contain information about the file. These structures are used in YARA
242    /// rules for expressing powerful and rich conditions. However, being able to
243    /// access this information outside YARA rules can also be beneficial.
244    ///
245    /// <br>
246    ///
247    /// This function allows the direct invocation of a YARA module for parsing
248    /// arbitrary data. It returns the structure produced by the module, which
249    /// depends upon the invoked module. The result will be [`None`] if the
250    /// module does not exist, or if it doesn't produce any information for
251    /// the input data.
252    ///
253    /// `T` must be one of the structure types returned by a YARA module, which
254    /// are defined in [`crate::mods`], like [`crate::mods::PE`], [`crate::mods::ELF`], etc.
255    ///
256    /// # Example
257    /// ```rust
258    /// # use yara_x;
259    /// let elf_info = yara_x::mods::invoke::<yara_x::mods::ELF>(&[]);
260    /// ```
261    pub fn invoke<T: protobuf::MessageFull>(data: &[u8]) -> Option<Box<T>> {
262        let module_output = invoke_dyn::<T>(data)?;
263        Some(<dyn protobuf::MessageDyn>::downcast_box(module_output).unwrap())
264    }
265
266    /// Like [`invoke`], but allows passing metadata to the module.
267    pub fn invoke_with_meta<T: protobuf::MessageFull>(
268        data: &[u8],
269        meta: Option<&[u8]>,
270    ) -> Option<Box<T>> {
271        let module_output = invoke_with_meta_dyn::<T>(data, meta)?;
272        Some(<dyn protobuf::MessageDyn>::downcast_box(module_output).unwrap())
273    }
274
275    /// Invokes a YARA module with arbitrary data, returning a dynamic
276    /// structure.
277    ///
278    /// This function is similar to [`invoke`] but its result is a dynamic-
279    /// dispatch version of the structure returned by the YARA module.
280    pub fn invoke_dyn<T: protobuf::MessageFull>(
281        data: &[u8],
282    ) -> Option<Box<dyn protobuf::MessageDyn>> {
283        invoke_with_meta_dyn::<T>(data, None)
284    }
285
286    /// Like [`invoke_dyn`], but allows passing metadata to the module.
287    pub fn invoke_with_meta_dyn<T: protobuf::MessageFull>(
288        data: &[u8],
289        meta: Option<&[u8]>,
290    ) -> Option<Box<dyn protobuf::MessageDyn>> {
291        let descriptor = T::descriptor();
292        let proto_name = descriptor.full_name();
293        let (_, module) =
294            super::BUILTIN_MODULES.iter().find(|(_, module)| {
295                module.root_struct_descriptor.full_name() == proto_name
296            })?;
297
298        module.main_fn?(data, meta).ok()
299    }
300
301    /// Invokes all YARA modules and returns the data produced by them.
302    ///
303    /// This function is similar to [`invoke`], but it returns the
304    /// information produced by all modules at once.
305    ///
306    /// # Example
307    /// ```rust
308    /// # use yara_x;
309    /// let modules_output = yara_x::mods::invoke_all(&[]);
310    /// ```
311    pub fn invoke_all(data: &[u8]) -> Box<Modules> {
312        let mut info = Box::new(Modules::new());
313        info.pe = protobuf::MessageField(invoke::<PE>(data));
314        info.elf = protobuf::MessageField(invoke::<ELF>(data));
315        info.dotnet = protobuf::MessageField(invoke::<Dotnet>(data));
316        info.macho = protobuf::MessageField(invoke::<Macho>(data));
317        info.lnk = protobuf::MessageField(invoke::<Lnk>(data));
318        info.crx = protobuf::MessageField(invoke::<Crx>(data));
319        info.dex = protobuf::MessageField(invoke::<Dex>(data));
320        info
321    }
322
323    /// Iterator over built-in module names.
324    ///
325    /// See the "debug modules" command.
326    pub fn module_names() -> impl Iterator<Item = &'static str> {
327        use itertools::Itertools;
328        super::BUILTIN_MODULES.keys().sorted_by_key(|k| **k).copied()
329    }
330
331    /// Returns the definition of the module with the given name.
332    pub fn module_definition(name: &str) -> Option<reflect::Struct> {
333        use crate::types;
334        use std::rc::Rc;
335        super::BUILTIN_MODULES
336            .get(name)
337            .map(|m| reflect::Struct::new(Rc::<types::Struct>::from(m)))
338    }
339
340    /// Types that allow for module introspection.
341    ///
342    /// This API is unstable and not ready for public use.
343    #[doc(hidden)]
344    pub mod reflect {
345        use std::borrow::Cow;
346        use std::rc::Rc;
347
348        use crate::types;
349        use crate::types::{Map, TypeValue};
350
351        /// Describes a structure or module.
352        #[derive(Clone, Debug, PartialEq)]
353        pub struct Struct {
354            inner: Rc<types::Struct>,
355        }
356
357        impl Struct {
358            pub(super) fn new(inner: Rc<types::Struct>) -> Self {
359                Self { inner }
360            }
361
362            /// Returns an iterator over the fields defined in the structure.
363            ///
364            /// The fields are sorted by name.
365            pub fn fields(&self) -> impl Iterator<Item = Field<'_>> + '_ {
366                self.inner
367                    .fields()
368                    .map(|(name, field)| Field::new(name, field))
369            }
370        }
371
372        /// Describes a function.
373        #[derive(Clone, Debug, PartialEq)]
374        pub struct Func {
375            /// All the existing signatures for this function. A function
376            /// can have multiple signatures that differ in their arguments
377            /// or return type.
378            pub signatures: Vec<FuncSignature>,
379        }
380
381        impl From<Rc<types::Func>> for Func {
382            fn from(func: Rc<types::Func>) -> Self {
383                let mut signatures =
384                    Vec::with_capacity(func.signatures().len());
385
386                for signature in func.signatures() {
387                    signatures.push(FuncSignature {
388                        args: signature
389                            .args
390                            .iter()
391                            .map(|(name, ty)| (name.clone(), Type::from(ty)))
392                            .collect(),
393                        ret: Type::from(&signature.result),
394                        doc: signature.doc.clone(),
395                    });
396                }
397
398                Func { signatures }
399            }
400        }
401
402        /// Describes a function signature.
403        #[derive(Clone, Debug, PartialEq)]
404        pub struct FuncSignature {
405            /// The names and types of the function arguments.
406            args: Vec<(String, Type)>,
407            /// The return type for the function.
408            ret: Type,
409            /// Function's documentation.
410            doc: Option<Cow<'static, str>>,
411        }
412
413        impl FuncSignature {
414            /// The names and types of the function arguments.
415            pub fn args(
416                &self,
417            ) -> impl ExactSizeIterator<Item = (&str, &Type)> {
418                self.args.iter().map(|(name, ty)| (name.as_str(), ty))
419            }
420
421            /// The return type for the function.
422            pub fn ret_type(&self) -> &Type {
423                &self.ret
424            }
425
426            /// Function's documentation.
427            pub fn doc(&self) -> Option<&str> {
428                self.doc.as_deref()
429            }
430        }
431
432        /// Describes a field within a structure or module.
433        #[derive(Clone)]
434        pub struct Field<'a> {
435            name: &'a str,
436            struct_field: &'a types::StructField,
437        }
438
439        impl<'a> Field<'a> {
440            fn new(
441                name: &'a str,
442                struct_field: &'a types::StructField,
443            ) -> Self {
444                Self { name, struct_field }
445            }
446
447            /// Returns the name of the field.
448            pub fn name(&self) -> &'a str {
449                self.name
450            }
451
452            /// Returns the type of the field.
453            pub fn ty(&self) -> Type {
454                Type::from(&self.struct_field.type_value)
455            }
456
457            /// Returns the documentation for the current field.
458            pub fn doc(&self) -> Option<&str> {
459                self.struct_field.doc.as_deref()
460            }
461        }
462
463        /// The type of field, function argument or return value.
464        #[derive(Clone, Debug, PartialEq)]
465        pub enum Type {
466            /// An integer.
467            Integer,
468            /// A float.
469            Float,
470            /// A boolean.
471            Bool,
472            /// A string.
473            String,
474            /// A regular expression
475            Regexp,
476            /// A structure.
477            Struct(Struct),
478            /// An array.
479            Array(Box<Type>),
480            /// A map.
481            Map(Box<Type>, Box<Type>),
482            /// A function.
483            Func(Func),
484        }
485
486        impl From<&TypeValue> for Type {
487            fn from(type_value: &TypeValue) -> Self {
488                match type_value {
489                    TypeValue::Bool { .. } => Type::Bool,
490                    TypeValue::Float { .. } => Type::Float,
491                    TypeValue::Integer { .. } => Type::Integer,
492                    TypeValue::String { .. } => Type::String,
493                    TypeValue::Regexp(_) => Type::Regexp,
494                    TypeValue::Struct(s) => {
495                        Type::Struct(Struct::new(s.clone()))
496                    }
497                    TypeValue::Array(a) => {
498                        Type::Array(Box::new(Type::from(&a.deputy())))
499                    }
500                    TypeValue::Map(m) => {
501                        let key_kind = match **m {
502                            Map::IntegerKeys { .. } => Type::Integer,
503                            Map::StringKeys { .. } => Type::String,
504                        };
505                        Type::Map(
506                            Box::new(key_kind),
507                            Box::new(Type::from(&m.deputy())),
508                        )
509                    }
510                    TypeValue::Func(func) => Type::Func(func.clone().into()),
511                    TypeValue::Unknown => unreachable!(),
512                }
513            }
514        }
515    }
516}
517
518#[cfg(feature = "crypto")]
519pub(crate) mod utils;