1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
use protobuf::MessageDyn;
use protobuf::reflect::MessageDescriptor;
use thiserror::Error;
pub mod protos {
#[cfg(feature = "generate-proto-code")]
include!(concat!(env!("OUT_DIR"), "/protos/mod.rs"));
#[cfg(not(feature = "generate-proto-code"))]
include!("protos/generated/mod.rs");
}
#[cfg(test)]
mod tests;
pub(crate) mod field_docs;
include!("modules.rs");
/// Enum describing errors occurred in modules.
#[derive(Error, Debug)]
#[non_exhaustive]
pub enum ModuleError {
/// Invalid format of module metadata.
#[error("invalid metadata: {err}")]
MetadataError {
/// The error that actually occurred.
err: String,
},
/// Error occurred when processing the input data.
#[error("internal error: {err}")]
InternalError {
/// The error that actually occurred.
err: String,
},
}
/// The trait implemented by all registered modules.
pub trait RegisteredModule: Send + Sync {
/// Name used for the module in `import` statements (e.g. `"my_module"`).
fn name(&self) -> &'static str;
/// Returns the descriptor of the protobuf message that defines the
/// module's root structure.
fn root_descriptor(&self) -> MessageDescriptor;
/// Main function called every time YARA scans some data, before
/// evaluating the rules. Set to `None` for data-only modules.
fn main_fn(
&self,
data: &[u8],
meta: Option<&[u8]>,
) -> Option<Result<Box<dyn MessageDyn>, ModuleError>>;
/// Rust module path of the submodule inside the external crate that
/// contains functions registered with `#[module_export(yara_x_crate = ...)]`.
///
/// Must match the value that `module_path!()` expands to at those
/// functions' definition site (e.g. `"my_crate::my_mod"`). Set to
/// `None` for data-only modules that export no callable functions.
fn rust_module_name(&self) -> Option<&'static str>;
}
/// Main function in a YARA module.
pub type ModuleMainFn<T> = fn(&[u8], Option<&[u8]>) -> Result<T, ModuleError>;
/// Description of a YARA module, generic over the type `T` returned by the
/// main function.
pub struct Module<T>
where
T: protobuf::MessageFull + 'static,
{
/// Name used for the module in `import` statements (e.g. `"my_module"`).
pub name: &'static str,
/// Main function called every time YARA scans some data, before
/// evaluating the rules. Set to `None` for data-only modules.
pub main_fn: Option<ModuleMainFn<T>>,
/// Rust module path of the submodule inside the external crate that
/// contains functions registered with `#[module_export(yara_x_crate = ...)]`.
pub rust_module_name: Option<&'static str>,
}
impl<T> RegisteredModule for Module<T>
where
T: protobuf::MessageFull + 'static,
{
fn name(&self) -> &'static str {
self.name
}
fn root_descriptor(&self) -> MessageDescriptor {
T::descriptor()
}
fn main_fn(
&self,
data: &[u8],
meta: Option<&[u8]>,
) -> Option<Result<Box<dyn MessageDyn>, ModuleError>> {
self.main_fn.map(|f| {
f(data, meta).map(|ok| Box::new(ok) as Box<dyn MessageDyn>)
})
}
fn rust_module_name(&self) -> Option<&'static str> {
self.rust_module_name
}
}
/// Macro used to register a YARA module.
///
/// # Examples
///
/// Registering a module with a main function:
///
/// ```ignore
/// register_module!("my_module", MyModuleProto, main);
/// ```
///
/// Registering a data-only module with no main function:
///
/// ```ignore
/// register_module!("my_module", MyModuleProto);
/// ```
#[macro_export]
macro_rules! register_module {
($name:literal, $root_message:ty, $main_fn:path) => {
$crate::mods::prelude::inventory::submit! {
&$crate::mods::prelude::Module::<$root_message> {
name: $name,
main_fn: Some($main_fn),
rust_module_name: Some(module_path!()),
} as &dyn $crate::mods::prelude::RegisteredModule
}
};
($name:literal, $root_message:ty) => {
$crate::mods::prelude::inventory::submit! {
&$crate::mods::prelude::Module::<$root_message> {
name: $name,
main_fn: None,
rust_module_name: None,
} as &dyn $crate::mods::prelude::RegisteredModule
}
};
}
inventory::collect!(&'static dyn RegisteredModule);
/// Returns an iterator over all registered modules.
#[inline]
pub(crate) fn registered_modules()
-> impl Iterator<Item = &'static dyn RegisteredModule> {
inventory::iter::<&'static dyn RegisteredModule>().copied()
}
pub mod mods {
/*! Utility functions and structures that allow invoking YARA modules directly.
The utility functions [`invoke`], [`invoke_dyn`] and [`invoke_all`]
allow leveraging YARA modules for parsing some file formats independently
of any YARA rule. With these functions you can pass arbitrary data to a
YARA module and obtain the same data structure that is accessible to YARA
rules and which you use in your rule conditions.
This allows external projects to benefit from YARA's file-parsing
capabilities for their own purposes.
# Example
```rust
# use yara_x;
let pe_info = yara_x::mods::invoke::<yara_x::mods::PE>(&[]);
```
*/
/// Data structures defined by the `crx` module.
///
/// The main structure produced by the module is [`crx::Crx`]. The rest
/// of them are used by one or more fields in the main structure.
///
pub use super::protos::crx;
/// Data structure returned by the `crx` module.
pub use super::protos::crx::Crx;
/// Data structures defined by the `dex` module.
///
/// The main structure produced by the module is [`dex::Dex`]. The rest
/// of them are used by one or more fields in the main structure.
///
pub use super::protos::dex;
/// Data structure returned by the `dex` module.
pub use super::protos::dex::Dex;
/// Data structures defined by the `dotnet` module.
///
/// The main structure produced by the module is [`dotnet::Dotnet`]. The
/// rest of them are used by one or more fields in the main structure.
///
pub use super::protos::dotnet;
/// Data structure returned by the `dotnet` module.
pub use super::protos::dotnet::Dotnet;
/// Data structures defined by the `elf` module.
///
/// The main structure produced by the module is [`elf::ELF`]. The rest of
/// them are used by one or more fields in the main structure.
///
pub use super::protos::elf;
/// Data structure returned by the `elf` module.
pub use super::protos::elf::ELF;
/// Data structures defined by the `lnk` module.
///
/// The main structure produced by the module is [`lnk::Lnk`]. The rest of
/// them are used by one or more fields in the main structure.
///
pub use super::protos::lnk;
/// Data structure returned by the `lnk` module.
pub use super::protos::lnk::Lnk;
/// Data structures defined by the `macho` module.
///
/// The main structure produced by the module is [`macho::Macho`]. The rest
/// of them are used by one or more fields in the main structure.
///
pub use super::protos::macho;
/// Data structure returned by the `macho` module.
pub use super::protos::macho::Macho;
/// Data structures defined by the `pe` module.
///
/// The main structure produced by the module is [`pe::PE`]. The rest
/// of them are used by one or more fields in the main structure.
///
pub use super::protos::pe;
/// Data structure returned by the `pe` module.
pub use super::protos::pe::PE;
/// A data structure containing the data returned by all modules.
pub use super::protos::mods::Modules;
/// Invokes a YARA module with arbitrary data.
///
/// <br>
///
/// YARA modules typically parse specific file formats, returning structures
/// that contain information about the file. These structures are used in YARA
/// rules for expressing powerful and rich conditions. However, being able to
/// access this information outside YARA rules can also be beneficial.
///
/// <br>
///
/// This function allows the direct invocation of a YARA module for parsing
/// arbitrary data. It returns the structure produced by the module, which
/// depends upon the invoked module. The result will be [`None`] if the
/// module does not exist, or if it doesn't produce any information for
/// the input data.
///
/// `T` must be one of the structure types returned by a YARA module, which
/// are defined in [`crate::mods`], like [`crate::mods::PE`], [`crate::mods::ELF`], etc.
///
/// # Example
/// ```rust
/// # use yara_x;
/// let elf_info = yara_x::mods::invoke::<yara_x::mods::ELF>(&[]);
/// ```
pub fn invoke<T: protobuf::MessageFull>(data: &[u8]) -> Option<Box<T>> {
let module_output = invoke_dyn::<T>(data)?;
Some(<dyn protobuf::MessageDyn>::downcast_box(module_output).unwrap())
}
/// Like [`invoke`], but allows passing metadata to the module.
pub fn invoke_with_meta<T: protobuf::MessageFull>(
data: &[u8],
meta: Option<&[u8]>,
) -> Option<Box<T>> {
let module_output = invoke_with_meta_dyn::<T>(data, meta)?;
Some(<dyn protobuf::MessageDyn>::downcast_box(module_output).unwrap())
}
/// Invokes a YARA module with arbitrary data, returning a dynamic
/// structure.
///
/// This function is similar to [`invoke`] but its result is a dynamic-
/// dispatch version of the structure returned by the YARA module.
pub fn invoke_dyn<T: protobuf::MessageFull>(
data: &[u8],
) -> Option<Box<dyn protobuf::MessageDyn>> {
invoke_with_meta_dyn::<T>(data, None)
}
/// Like [`invoke_dyn`], but allows passing metadata to the module.
pub fn invoke_with_meta_dyn<T: protobuf::MessageFull>(
data: &[u8],
meta: Option<&[u8]>,
) -> Option<Box<dyn protobuf::MessageDyn>> {
let descriptor = T::descriptor();
let proto_name = descriptor.full_name();
let module = super::registered_modules()
.find(|m| m.root_descriptor().full_name() == proto_name)?;
module.main_fn(data, meta)?.ok()
}
/// Invokes all YARA modules and returns the data produced by them.
///
/// This function is similar to [`invoke`], but it returns the
/// information produced by all modules at once.
///
/// # Example
/// ```rust
/// # use yara_x;
/// let modules_output = yara_x::mods::invoke_all(&[]);
/// ```
pub fn invoke_all(data: &[u8]) -> Box<Modules> {
let mut info = Box::new(Modules::new());
info.pe = protobuf::MessageField(invoke::<PE>(data));
info.elf = protobuf::MessageField(invoke::<ELF>(data));
info.dotnet = protobuf::MessageField(invoke::<Dotnet>(data));
info.macho = protobuf::MessageField(invoke::<Macho>(data));
info.lnk = protobuf::MessageField(invoke::<Lnk>(data));
info.crx = protobuf::MessageField(invoke::<Crx>(data));
info.dex = protobuf::MessageField(invoke::<Dex>(data));
info
}
/// Iterator over all registered module names.
///
/// See the "debug modules" command.
pub fn module_names() -> impl Iterator<Item = &'static str> {
use itertools::Itertools;
super::registered_modules().map(|m| m.name()).sorted()
}
/// Returns the definition of the module with the given name.
pub fn module_definition(name: &str) -> Option<reflect::Struct> {
use std::rc::Rc;
super::registered_modules()
.find(|m| m.name() == name)
.map(|m| reflect::Struct::new(Rc::<crate::types::Struct>::from(m)))
}
/// Everything needed to implement your own YARA-X modules.
#[allow(unused_imports)]
#[allow(missing_docs)]
pub mod prelude {
pub use crate::modules::Module;
pub use crate::modules::ModuleError;
pub use crate::modules::RegisteredModule;
pub use crate::register_module;
pub use crate::wasm::runtime::Caller;
pub use crate::wasm::string::FixedLenString;
pub use crate::wasm::string::RuntimeString;
pub use crate::wasm::string::String as _;
pub use crate::wasm::string::{Lowercase, Uppercase};
pub use crate::wasm::*;
pub use bstr::ByteSlice;
pub use inventory;
pub use protobuf::MessageFull;
pub use yara_x_macros::wasm_export;
/// Opaque scan context passed as first argument to functions exported from a
/// [`Module`] via `#[module_export]`.
///
/// Functions only receive a reference to it; all fields are private.
pub type ScanContext<'r, 'd> = crate::scanner::ScanContext<'r, 'd>;
/// Attribute macro for exporting a callable function from a [`Module`].
///
/// ```ignore
/// use yara_x::mods::prelude::*;
/// #[module_export]
/// fn add(_ctx: &ScanContext, a: i64, b: i64) -> i64 { a + b }
/// ```
pub use yara_x_macros::module_export;
}
/// Types that allow for module introspection.
///
/// This API is unstable and not ready for public use.
#[doc(hidden)]
pub mod reflect {
use std::borrow::Cow;
use std::rc::Rc;
use crate::types;
use crate::types::{Map, TypeValue};
/// Describes a structure or module.
#[derive(Clone, Debug, PartialEq)]
pub struct Struct {
inner: Rc<types::Struct>,
}
impl Struct {
pub(super) fn new(inner: Rc<types::Struct>) -> Self {
Self { inner }
}
/// Returns an iterator over the fields defined in the structure.
///
/// The fields are sorted by name.
pub fn fields(&self) -> impl Iterator<Item = Field<'_>> + '_ {
self.inner
.fields()
.map(|(name, field)| Field::new(name, field))
}
}
/// Describes a function.
#[derive(Clone, Debug, PartialEq)]
pub struct Func {
/// All the existing signatures for this function. A function
/// can have multiple signatures that differ in their arguments
/// or return type.
pub signatures: Vec<FuncSignature>,
}
impl From<Rc<types::Func>> for Func {
fn from(func: Rc<types::Func>) -> Self {
let mut signatures =
Vec::with_capacity(func.signatures().len());
for signature in func.signatures() {
signatures.push(FuncSignature {
args: signature
.args
.iter()
.map(|(name, ty)| (name.clone(), Type::from(ty)))
.collect(),
ret: Type::from(&signature.result),
doc: signature.doc.clone(),
});
}
Func { signatures }
}
}
/// Describes a function signature.
#[derive(Clone, Debug, PartialEq)]
pub struct FuncSignature {
/// The names and types of the function arguments.
args: Vec<(String, Type)>,
/// The return type for the function.
ret: Type,
/// Function's documentation.
doc: Option<Cow<'static, str>>,
}
impl FuncSignature {
/// The names and types of the function arguments.
pub fn args(
&self,
) -> impl ExactSizeIterator<Item = (&str, &Type)> {
self.args.iter().map(|(name, ty)| (name.as_str(), ty))
}
/// The return type for the function.
pub fn ret_type(&self) -> &Type {
&self.ret
}
/// Function's documentation.
pub fn doc(&self) -> Option<&str> {
self.doc.as_deref()
}
}
/// Describes a field within a structure or module.
#[derive(Clone)]
pub struct Field<'a> {
name: &'a str,
struct_field: &'a types::StructField,
}
impl<'a> Field<'a> {
fn new(
name: &'a str,
struct_field: &'a types::StructField,
) -> Self {
Self { name, struct_field }
}
/// Returns the name of the field.
pub fn name(&self) -> &'a str {
self.name
}
/// Returns the type of the field.
pub fn ty(&self) -> Type {
Type::from(&self.struct_field.type_value)
}
/// Returns the documentation for the current field.
pub fn doc(&self) -> Option<&str> {
self.struct_field.doc.as_deref()
}
}
/// The type of field, function argument or return value.
#[derive(Clone, Debug, PartialEq)]
pub enum Type {
/// An integer.
Integer,
/// A float.
Float,
/// A boolean.
Bool,
/// A string.
String,
/// A regular expression
Regexp,
/// A structure.
Struct(Struct),
/// An array.
Array(Box<Type>),
/// A map.
Map(Box<Type>, Box<Type>),
/// A function.
Func(Func),
}
impl From<&TypeValue> for Type {
fn from(type_value: &TypeValue) -> Self {
match type_value {
TypeValue::Bool { .. } => Type::Bool,
TypeValue::Float { .. } => Type::Float,
TypeValue::Integer { .. } => Type::Integer,
TypeValue::String { .. } => Type::String,
TypeValue::Regexp(_) => Type::Regexp,
TypeValue::Struct(s) => {
Type::Struct(Struct::new(s.clone()))
}
TypeValue::Array(a) => {
Type::Array(Box::new(Type::from(&a.deputy())))
}
TypeValue::Map(m) => {
let key_kind = match **m {
Map::IntegerKeys { .. } => Type::Integer,
Map::StringKeys { .. } => Type::String,
};
Type::Map(
Box::new(key_kind),
Box::new(Type::from(&m.deputy())),
)
}
TypeValue::Func(func) => Type::Func(func.clone().into()),
TypeValue::Unknown => unreachable!(),
}
}
}
}
}
#[cfg(feature = "crypto")]
pub(crate) mod utils;