yara_x/modules/mod.rs
1use std::sync::LazyLock;
2
3use protobuf::MessageDyn;
4use protobuf::reflect::MessageDescriptor;
5use rustc_hash::FxHashMap;
6
7use thiserror::Error;
8
9pub mod protos {
10 #[cfg(feature = "generate-proto-code")]
11 include!(concat!(env!("OUT_DIR"), "/protos/mod.rs"));
12
13 #[cfg(not(feature = "generate-proto-code"))]
14 include!("protos/generated/mod.rs");
15}
16
17#[cfg(test)]
18mod tests;
19
20pub(crate) mod field_docs;
21
22#[allow(unused_imports)]
23pub(crate) mod prelude {
24 pub(crate) use crate::scanner::ScanContext;
25 pub(crate) use crate::wasm::runtime::Caller;
26 pub(crate) use crate::wasm::string::FixedLenString;
27 pub(crate) use crate::wasm::string::RuntimeString;
28 pub(crate) use crate::wasm::string::String as _;
29 pub(crate) use crate::wasm::string::{Lowercase, Uppercase};
30 pub(crate) use crate::wasm::*;
31 pub(crate) use bstr::ByteSlice;
32 #[cfg(not(feature = "inventory"))]
33 pub(crate) use linkme::distributed_slice;
34 pub(crate) use yara_x_macros::{module_export, module_main, wasm_export};
35}
36include!("modules.rs");
37
38/// Enum describing errors occurred in modules.
39#[derive(Error, Debug)]
40#[non_exhaustive]
41pub enum ModuleError {
42 /// Invalid format of module metadata.
43 #[error("invalid metadata: {err}")]
44 MetadataError {
45 /// The error that actually occurred.
46 err: String,
47 },
48 /// Error occurred when processing the input data.
49 #[error("internal error: {err}")]
50 InternalError {
51 /// The error that actually occurred.
52 err: String,
53 },
54}
55
56/// Signature of a module's main function.
57type MainFn =
58 fn(&[u8], Option<&[u8]>) -> Result<Box<dyn MessageDyn>, ModuleError>;
59
60/// A structure describing a YARA module.
61pub(crate) struct Module {
62 /// Pointer to the module's main function.
63 pub main_fn: Option<MainFn>,
64 /// Name of the Rust module, if any, that contains code for this YARA
65 /// module (e.g: "test_proto2").
66 pub rust_module_name: Option<&'static str>,
67 /// A [`MessageDescriptor`] that describes the module's structure. This
68 /// corresponds to the protobuf message declared in the "root_message"
69 /// for the YARA module. It allows iterating the fields declared by the
70 /// module and obtaining their names and types.
71 pub root_struct_descriptor: MessageDescriptor,
72}
73
74/// Macro that adds a module to the `BUILTIN_MODULES` map.
75///
76/// This macro is used by `add_modules.rs`, a file that is automatically
77/// generated by `build.rs` based on the Protocol Buffers defined in the
78/// `src/modules/protos` directory.
79///
80/// # Example
81///
82/// add_module!(modules, "test", test, "Test", test_mod, Some(test::main as
83/// MainFn));
84macro_rules! add_module {
85 ($modules:expr, $name:literal, $proto:ident, $root_message:literal, $rust_module_name:expr, $main_fn:expr) => {{
86 use std::stringify;
87 let root_struct_descriptor = protos::$proto::file_descriptor()
88 // message_by_full_name expects a dot (.) at the beginning
89 // of the name.
90 .message_by_full_name(format!(".{}", $root_message).as_str())
91 .expect(format!(
92 "`root_message` option in protobuf `{}` is wrong, message `{}` is not defined",
93 stringify!($proto),
94 $root_message
95 ).as_str());
96
97 $modules.insert(
98 $name,
99 Module {
100 main_fn: $main_fn,
101 rust_module_name: $rust_module_name,
102 root_struct_descriptor,
103 },
104 );
105 }};
106}
107
108/// `BUILTIN_MODULES` is a static, global map where keys are module names
109/// and values are [`Module`] structures that describe a YARA module.
110///
111/// This table is populated with the modules defined by a `.proto` file in
112/// `src/modules/protos`. Each `.proto` file that contains a statement like
113/// the following one defines a YARA module:
114///
115/// ```protobuf
116/// option (yara.module_options) = {
117/// name : "foo"
118/// root_message: "Foo"
119/// rust_module: "foo"
120/// };
121/// ```
122///
123/// The `name` field is the module's name (i.e: the name used in `import`
124/// statements), which is also the key in `BUILTIN_MODULES`. `root_message`
125/// is the name of the message that describes the module's structure. This
126/// is required because a `.proto` file can define more than one message.
127///
128/// `rust_module` is the name of the Rust module where functions exported
129/// by the YARA module are defined. This field is optional, if not provided
130/// the module is considered a data-only module.
131pub(crate) static BUILTIN_MODULES: LazyLock<FxHashMap<&'static str, Module>> =
132 LazyLock::new(|| {
133 let mut modules = FxHashMap::default();
134 // The `add_modules.rs` file is automatically generated at compile time
135 // by `build.rs`. This is an example of how `add_modules.rs` looks like:
136 //
137 // {
138 // #[cfg(feature = "pe_module")]
139 // add_module!(modules, "pe", pe, "pe.PE", Some("pe"), Some(pe::__main__ as MainFn));
140 //
141 // #[cfg(feature = "elf_module")]
142 // add_module!(modules, "elf", elf, "elf.ELF", Some("elf"), Some(elf::__main__ as MainFn));
143 // }
144 //
145 // `add_modules.rs` will contain an `add_module!` statement for each
146 // protobuf in `src/modules/protos` defining a YARA module.
147 include!("add_modules.rs");
148 modules
149 });
150
151pub mod mods {
152 /*! Utility functions and structures that allow invoking YARA modules directly.
153
154 The utility functions [`invoke`], [`invoke_dyn`] and [`invoke_all`]
155 allow leveraging YARA modules for parsing some file formats independently
156 of any YARA rule. With these functions you can pass arbitrary data to a
157 YARA module and obtain the same data structure that is accessible to YARA
158 rules and which you use in your rule conditions.
159
160 This allows external projects to benefit from YARA's file-parsing
161 capabilities for their own purposes.
162
163 # Example
164
165 ```rust
166 # use yara_x;
167 let pe_info = yara_x::mods::invoke::<yara_x::mods::PE>(&[]);
168 ```
169 */
170
171 /// Data structures defined by the `crx` module.
172 ///
173 /// The main structure produced by the module is [`crx::Crx`]. The rest
174 /// of them are used by one or more fields in the main structure.
175 ///
176 pub use super::protos::crx;
177 /// Data structure returned by the `crx` module.
178 pub use super::protos::crx::Crx;
179 /// Data structures defined by the `dex` module.
180 ///
181 /// The main structure produced by the module is [`dex::Dex`]. The rest
182 /// of them are used by one or more fields in the main structure.
183 ///
184 pub use super::protos::dex;
185 /// Data structure returned by the `dex` module.
186 pub use super::protos::dex::Dex;
187
188 /// Data structures defined by the `dotnet` module.
189 ///
190 /// The main structure produced by the module is [`dotnet::Dotnet`]. The
191 /// rest of them are used by one or more fields in the main structure.
192 ///
193 pub use super::protos::dotnet;
194 /// Data structure returned by the `dotnet` module.
195 pub use super::protos::dotnet::Dotnet;
196
197 /// Data structures defined by the `elf` module.
198 ///
199 /// The main structure produced by the module is [`elf::ELF`]. The rest of
200 /// them are used by one or more fields in the main structure.
201 ///
202 pub use super::protos::elf;
203 /// Data structure returned by the `elf` module.
204 pub use super::protos::elf::ELF;
205
206 /// Data structures defined by the `lnk` module.
207 ///
208 /// The main structure produced by the module is [`lnk::Lnk`]. The rest of
209 /// them are used by one or more fields in the main structure.
210 ///
211 pub use super::protos::lnk;
212 /// Data structure returned by the `lnk` module.
213 pub use super::protos::lnk::Lnk;
214
215 /// Data structures defined by the `macho` module.
216 ///
217 /// The main structure produced by the module is [`macho::Macho`]. The rest
218 /// of them are used by one or more fields in the main structure.
219 ///
220 pub use super::protos::macho;
221 /// Data structure returned by the `macho` module.
222 pub use super::protos::macho::Macho;
223
224 /// Data structures defined by the `pe` module.
225 ///
226 /// The main structure produced by the module is [`pe::PE`]. The rest
227 /// of them are used by one or more fields in the main structure.
228 ///
229 pub use super::protos::pe;
230 /// Data structure returned by the `pe` module.
231 pub use super::protos::pe::PE;
232
233 /// A data structure containing the data returned by all modules.
234 pub use super::protos::mods::Modules;
235
236 /// Invokes a YARA module with arbitrary data.
237 ///
238 /// <br>
239 ///
240 /// YARA modules typically parse specific file formats, returning structures
241 /// that contain information about the file. These structures are used in YARA
242 /// rules for expressing powerful and rich conditions. However, being able to
243 /// access this information outside YARA rules can also be beneficial.
244 ///
245 /// <br>
246 ///
247 /// This function allows the direct invocation of a YARA module for parsing
248 /// arbitrary data. It returns the structure produced by the module, which
249 /// depends upon the invoked module. The result will be [`None`] if the
250 /// module does not exist, or if it doesn't produce any information for
251 /// the input data.
252 ///
253 /// `T` must be one of the structure types returned by a YARA module, which
254 /// are defined in [`crate::mods`], like [`crate::mods::PE`], [`crate::mods::ELF`], etc.
255 ///
256 /// # Example
257 /// ```rust
258 /// # use yara_x;
259 /// let elf_info = yara_x::mods::invoke::<yara_x::mods::ELF>(&[]);
260 /// ```
261 pub fn invoke<T: protobuf::MessageFull>(data: &[u8]) -> Option<Box<T>> {
262 let module_output = invoke_dyn::<T>(data)?;
263 Some(<dyn protobuf::MessageDyn>::downcast_box(module_output).unwrap())
264 }
265
266 /// Like [`invoke`], but allows passing metadata to the module.
267 pub fn invoke_with_meta<T: protobuf::MessageFull>(
268 data: &[u8],
269 meta: Option<&[u8]>,
270 ) -> Option<Box<T>> {
271 let module_output = invoke_with_meta_dyn::<T>(data, meta)?;
272 Some(<dyn protobuf::MessageDyn>::downcast_box(module_output).unwrap())
273 }
274
275 /// Invokes a YARA module with arbitrary data, returning a dynamic
276 /// structure.
277 ///
278 /// This function is similar to [`invoke`] but its result is a dynamic-
279 /// dispatch version of the structure returned by the YARA module.
280 pub fn invoke_dyn<T: protobuf::MessageFull>(
281 data: &[u8],
282 ) -> Option<Box<dyn protobuf::MessageDyn>> {
283 invoke_with_meta_dyn::<T>(data, None)
284 }
285
286 /// Like [`invoke_dyn`], but allows passing metadata to the module.
287 pub fn invoke_with_meta_dyn<T: protobuf::MessageFull>(
288 data: &[u8],
289 meta: Option<&[u8]>,
290 ) -> Option<Box<dyn protobuf::MessageDyn>> {
291 let descriptor = T::descriptor();
292 let proto_name = descriptor.full_name();
293 let (_, module) =
294 super::BUILTIN_MODULES.iter().find(|(_, module)| {
295 module.root_struct_descriptor.full_name() == proto_name
296 })?;
297
298 module.main_fn?(data, meta).ok()
299 }
300
301 /// Invokes all YARA modules and returns the data produced by them.
302 ///
303 /// This function is similar to [`invoke`], but it returns the
304 /// information produced by all modules at once.
305 ///
306 /// # Example
307 /// ```rust
308 /// # use yara_x;
309 /// let modules_output = yara_x::mods::invoke_all(&[]);
310 /// ```
311 pub fn invoke_all(data: &[u8]) -> Box<Modules> {
312 let mut info = Box::new(Modules::new());
313 info.pe = protobuf::MessageField(invoke::<PE>(data));
314 info.elf = protobuf::MessageField(invoke::<ELF>(data));
315 info.dotnet = protobuf::MessageField(invoke::<Dotnet>(data));
316 info.macho = protobuf::MessageField(invoke::<Macho>(data));
317 info.lnk = protobuf::MessageField(invoke::<Lnk>(data));
318 info.crx = protobuf::MessageField(invoke::<Crx>(data));
319 info.dex = protobuf::MessageField(invoke::<Dex>(data));
320 info
321 }
322
323 /// Iterator over built-in module names.
324 ///
325 /// See the "debug modules" command.
326 pub fn module_names() -> impl Iterator<Item = &'static str> {
327 use itertools::Itertools;
328 super::BUILTIN_MODULES.keys().sorted_by_key(|k| **k).copied()
329 }
330
331 /// Returns the definition of the module with the given name.
332 pub fn module_definition(name: &str) -> Option<reflect::Struct> {
333 use crate::types;
334 use std::rc::Rc;
335 super::BUILTIN_MODULES
336 .get(name)
337 .map(|m| reflect::Struct::new(Rc::<types::Struct>::from(m)))
338 }
339
340 /// Types that allow for module introspection.
341 ///
342 /// This API is unstable and not ready for public use.
343 #[doc(hidden)]
344 pub mod reflect {
345 use std::borrow::Cow;
346 use std::rc::Rc;
347
348 use crate::types;
349 use crate::types::{Map, TypeValue};
350
351 /// Describes a structure or module.
352 #[derive(Clone, Debug, PartialEq)]
353 pub struct Struct {
354 inner: Rc<types::Struct>,
355 }
356
357 impl Struct {
358 pub(super) fn new(inner: Rc<types::Struct>) -> Self {
359 Self { inner }
360 }
361
362 /// Returns an iterator over the fields defined in the structure.
363 ///
364 /// The fields are sorted by name.
365 pub fn fields(&self) -> impl Iterator<Item = Field<'_>> + '_ {
366 self.inner
367 .fields()
368 .map(|(name, field)| Field::new(name, field))
369 }
370 }
371
372 /// Describes a function.
373 #[derive(Clone, Debug, PartialEq)]
374 pub struct Func {
375 /// All the existing signatures for this function. A function
376 /// can have multiple signatures that differ in their arguments
377 /// or return type.
378 pub signatures: Vec<FuncSignature>,
379 }
380
381 impl From<Rc<types::Func>> for Func {
382 fn from(func: Rc<types::Func>) -> Self {
383 let mut signatures =
384 Vec::with_capacity(func.signatures().len());
385
386 for signature in func.signatures() {
387 signatures.push(FuncSignature {
388 args: signature
389 .args
390 .iter()
391 .map(|(name, ty)| (name.clone(), Type::from(ty)))
392 .collect(),
393 ret: Type::from(&signature.result),
394 doc: signature.doc.clone(),
395 });
396 }
397
398 Func { signatures }
399 }
400 }
401
402 /// Describes a function signature.
403 #[derive(Clone, Debug, PartialEq)]
404 pub struct FuncSignature {
405 /// The names and types of the function arguments.
406 args: Vec<(String, Type)>,
407 /// The return type for the function.
408 ret: Type,
409 /// Function's documentation.
410 doc: Option<Cow<'static, str>>,
411 }
412
413 impl FuncSignature {
414 /// The names and types of the function arguments.
415 pub fn args(
416 &self,
417 ) -> impl ExactSizeIterator<Item = (&str, &Type)> {
418 self.args.iter().map(|(name, ty)| (name.as_str(), ty))
419 }
420
421 /// The return type for the function.
422 pub fn ret_type(&self) -> &Type {
423 &self.ret
424 }
425
426 /// Function's documentation.
427 pub fn doc(&self) -> Option<&str> {
428 self.doc.as_deref()
429 }
430 }
431
432 /// Describes a field within a structure or module.
433 #[derive(Clone)]
434 pub struct Field<'a> {
435 name: &'a str,
436 struct_field: &'a types::StructField,
437 }
438
439 impl<'a> Field<'a> {
440 fn new(
441 name: &'a str,
442 struct_field: &'a types::StructField,
443 ) -> Self {
444 Self { name, struct_field }
445 }
446
447 /// Returns the name of the field.
448 pub fn name(&self) -> &'a str {
449 self.name
450 }
451
452 /// Returns the type of the field.
453 pub fn ty(&self) -> Type {
454 Type::from(&self.struct_field.type_value)
455 }
456
457 /// Returns the documentation for the current field.
458 pub fn doc(&self) -> Option<&str> {
459 self.struct_field.doc.as_deref()
460 }
461 }
462
463 /// The type of field, function argument or return value.
464 #[derive(Clone, Debug, PartialEq)]
465 pub enum Type {
466 /// An integer.
467 Integer,
468 /// A float.
469 Float,
470 /// A boolean.
471 Bool,
472 /// A string.
473 String,
474 /// A regular expression
475 Regexp,
476 /// A structure.
477 Struct(Struct),
478 /// An array.
479 Array(Box<Type>),
480 /// A map.
481 Map(Box<Type>, Box<Type>),
482 /// A function.
483 Func(Func),
484 }
485
486 impl From<&TypeValue> for Type {
487 fn from(type_value: &TypeValue) -> Self {
488 match type_value {
489 TypeValue::Bool { .. } => Type::Bool,
490 TypeValue::Float { .. } => Type::Float,
491 TypeValue::Integer { .. } => Type::Integer,
492 TypeValue::String { .. } => Type::String,
493 TypeValue::Regexp(_) => Type::Regexp,
494 TypeValue::Struct(s) => {
495 Type::Struct(Struct::new(s.clone()))
496 }
497 TypeValue::Array(a) => {
498 Type::Array(Box::new(Type::from(&a.deputy())))
499 }
500 TypeValue::Map(m) => {
501 let key_kind = match **m {
502 Map::IntegerKeys { .. } => Type::Integer,
503 Map::StringKeys { .. } => Type::String,
504 };
505 Type::Map(
506 Box::new(key_kind),
507 Box::new(Type::from(&m.deputy())),
508 )
509 }
510 TypeValue::Func(func) => Type::Func(func.clone().into()),
511 TypeValue::Unknown => unreachable!(),
512 }
513 }
514 }
515 }
516}
517
518#[cfg(feature = "crypto")]
519pub(crate) mod utils;