boreal_parser/expression/
mod.rs

1//! Types related to the condition part of YARA rules.
2use std::ops::Range;
3
4mod boolean_expression;
5mod common;
6mod for_expression;
7mod identifier;
8mod primary_expression;
9mod read_integer;
10mod string_expression;
11
12use crate::regex::Regex;
13
14pub(crate) use boolean_expression::boolean_expression as expression;
15
16/// Integer read type, see [`ExpressionKind::ReadInteger`].
17#[derive(Copy, Clone, Debug, PartialEq, Eq)]
18pub enum ReadIntegerType {
19    /// 8 bits, signed
20    Int8,
21    /// 8 bits, unsigned
22    Uint8,
23    /// 16 bits, signed
24    Int16,
25    /// 16 bits, signed, big-endian
26    Int16BE,
27    /// 16 bits, unsigned
28    Uint16,
29    /// 16 bits, unsigned, big-endian
30    Uint16BE,
31    /// 32 bits, signed
32    Int32,
33    /// 32 bits, signed, big-endian
34    Int32BE,
35    /// 32 bits, unsigned
36    Uint32,
37    /// 32 bits, unsigned, big-endian
38    Uint32BE,
39}
40
41/// Parsed identifier used in expressions.
42#[derive(Clone, Debug, PartialEq)]
43pub struct Identifier {
44    /// Name of the identifier
45    pub name: String,
46
47    /// Span covering the name of the identifier.
48    pub name_span: Range<usize>,
49
50    /// Operations on the identifier, stored in the order of operations.
51    ///
52    /// For example, `pe.sections[2].name` would give `pe` for the name, and
53    /// `[Subfield("sections"), Subscript(Expr::Integer(2)), Subfield("name")]` for the operations.
54    pub operations: Vec<IdentifierOperation>,
55}
56
57/// Operation applied on an identifier.
58#[derive(Clone, Debug, PartialEq)]
59pub struct IdentifierOperation {
60    /// Type of the operation
61    pub op: IdentifierOperationType,
62
63    /// Span covering the operation
64    pub span: Range<usize>,
65}
66
67/// Type of operation applied on an identifier.
68#[derive(Clone, Debug, PartialEq)]
69pub enum IdentifierOperationType {
70    /// Array subscript, i.e. `identifier[subscript]`.
71    Subscript(Box<Expression>),
72    /// Object subfield, i.e. `identifier.subfield`.
73    Subfield(String),
74    /// Function call, i.e. `identifier(arguments)`.
75    FunctionCall(Vec<Expression>),
76}
77
78/// An expression parsed in a Rule.
79#[derive(Clone, Debug, PartialEq)]
80pub enum ExpressionKind {
81    /// Size of the file being scanned.
82    Filesize,
83
84    /// Entrypoint of the file being scanned, if it is a PE or ELF.
85    ///
86    /// Deprecated, use the `pe` or `elf` module instead.
87    Entrypoint,
88
89    /// An integer read at a given address.
90    ///
91    /// See the yara documentation on `int8`, `uint16be` etc.
92    ReadInteger {
93        /// Which size and endianness to read.
94        ty: ReadIntegerType,
95        /// Address/Offset of the input where to read.
96        addr: Box<Expression>,
97    },
98
99    /// A i64 value.
100    Integer(i64),
101
102    /// A f64 floating-point value.
103    Double(f64),
104
105    /// Count number of matches on a given variable.
106    Count(String),
107
108    /// Count number of matches on a given variable in a specific range of the input.
109    CountInRange {
110        /// Name of the variable being counted
111        variable_name: String,
112        /// Span for the name of the variable
113        variable_name_span: Range<usize>,
114        /// Starting offset, included.
115        from: Box<Expression>,
116        /// Ending offset, included.
117        to: Box<Expression>,
118    },
119
120    /// Offset of a variable match
121    Offset {
122        /// Name of the variable
123        variable_name: String,
124
125        /// Occurrence number.
126        ///
127        /// `1` is the first match on the variable, `2` is the next one, etc.
128        occurence_number: Box<Expression>,
129    },
130
131    /// Length of a variable match
132    Length {
133        /// Name of the variable
134        variable_name: String,
135
136        /// Occurrence number.
137        ///
138        /// `1` is the first match on the variable, `2` is the next one, etc.
139        occurence_number: Box<Expression>,
140    },
141
142    /// Opposite value, for integers and floats.
143    Neg(Box<Expression>),
144
145    /// Addition, for integers and floats.
146    Add(Box<Expression>, Box<Expression>),
147    /// Substraction, for integers and floats.
148    Sub(Box<Expression>, Box<Expression>),
149    /// Multiplication, for integers and floats.
150    Mul(Box<Expression>, Box<Expression>),
151    /// Division, for integers and floats.
152    Div(Box<Expression>, Box<Expression>),
153
154    /// Modulo, for integers.
155    Mod(Box<Expression>, Box<Expression>),
156
157    /// Bitwise xor, for integers.
158    BitwiseXor(Box<Expression>, Box<Expression>),
159    /// Bitwise and, for integers.
160    BitwiseAnd(Box<Expression>, Box<Expression>),
161    /// Bitwise or, for integers.
162    BitwiseOr(Box<Expression>, Box<Expression>),
163
164    /// Bitwise negation, for integers.
165    BitwiseNot(Box<Expression>),
166
167    /// Shift left, both elements must be integers.
168    ShiftLeft(Box<Expression>, Box<Expression>),
169    /// Shift right, both elements must be integers.
170    ShiftRight(Box<Expression>, Box<Expression>),
171
172    /// Boolean and operation.
173    And(Vec<Expression>),
174    /// Boolean or operation.
175    Or(Vec<Expression>),
176
177    /// Boolean negation.
178    Not(Box<Expression>),
179
180    /// Comparison.
181    ///
182    /// Integers and floats can be compared to integers and floats.
183    /// Strings can be compared to strings.
184    Cmp {
185        /// Left operand.
186        left: Box<Expression>,
187        /// Right operand.
188        right: Box<Expression>,
189        /// If true this is '<', otherwise '>'
190        less_than: bool,
191        /// If true, left == right returns true.
192        can_be_equal: bool,
193    },
194
195    /// Equal
196    Eq(Box<Expression>, Box<Expression>),
197
198    /// Not equal
199    NotEq(Box<Expression>, Box<Expression>),
200
201    /// Does a string contains another string
202    Contains {
203        /// String to search in
204        haystack: Box<Expression>,
205        /// String to search
206        needle: Box<Expression>,
207        /// If true, the search is case insensitive.
208        case_insensitive: bool,
209    },
210
211    /// Does a string starts with another string
212    StartsWith {
213        /// String to search in
214        expr: Box<Expression>,
215        /// Prefix to search
216        prefix: Box<Expression>,
217        /// If true, the search is case insensitive.
218        case_insensitive: bool,
219    },
220
221    /// Does a string ends with another string
222    EndsWith {
223        /// String to search in
224        expr: Box<Expression>,
225        /// Prefix to search
226        suffix: Box<Expression>,
227        /// If true, the search is case insensitive.
228        case_insensitive: bool,
229    },
230
231    /// Case insensitive equality test. Both elements must be strings.
232    IEquals(Box<Expression>, Box<Expression>),
233
234    /// Does a string matches a regex.
235    Matches(Box<Expression>, Regex),
236
237    /// Is a given value defined.
238    ///
239    /// For example, `defined filesize` will be true when scanning a file,
240    /// false otherwise.
241    Defined(Box<Expression>),
242
243    /// A boolean value.
244    Boolean(bool),
245
246    /// Does a variable matches
247    Variable(String),
248
249    /// Does a variable matches at a given offset.
250    VariableAt {
251        /// Name of the variable
252        variable_name: String,
253        /// Span for the name of the variable
254        variable_name_span: Range<usize>,
255        /// Offset
256        offset: Box<Expression>,
257    },
258
259    /// Does a variable matches in a given offset range.
260    VariableIn {
261        /// Name of the variable.
262        variable_name: String,
263        /// Span for the name of the variable
264        variable_name_span: Range<usize>,
265        /// Starting offset, included.
266        from: Box<Expression>,
267        /// Ending offset, included.
268        to: Box<Expression>,
269    },
270
271    /// Evaluate multiple variables on a given expression.
272    ///
273    /// For each variable in `set`, evaluate `body`.
274    /// Then, if the number of evaluations returning true
275    /// matches the `selection`, then this expression returns true.
276    For {
277        /// How many variables must match for this expression to be true.
278        selection: ForSelection,
279
280        /// Which variables to select.
281        set: VariableSet,
282
283        /// `ParsedExpr` to evaluate for each variable.
284        ///
285        /// The body can contain `$`, `#`, `@` or `!` to refer to the
286        /// currently selected variable.
287        ///
288        /// If unset, this is equivalent to `$`, i.e. true if the selected
289        /// variable matches.
290        body: Option<Box<Expression>>,
291    },
292
293    /// Evaluate the presence of multiple variables in a given range.
294    ///
295    /// This is equivalent to a [`Self::For`] value, with a body
296    /// set to `$ in (from..to)`.
297    ForIn {
298        /// How many variables must match for this expresion to be true.
299        selection: ForSelection,
300        /// Which variables to select.
301        set: VariableSet,
302        /// Starting offset, included.
303        from: Box<Expression>,
304        /// Ending offset, included.
305        to: Box<Expression>,
306    },
307
308    /// Evaluate the presence of multiple variables at a given offset.
309    ///
310    /// This is equivalent to a [`Self::For`] value, with a body
311    /// set to `$ at expr`.
312    ForAt {
313        /// How many variables must match for this expresion to be true.
314        selection: ForSelection,
315        /// Which variables to select.
316        set: VariableSet,
317        /// Offset of the variable match.
318        offset: Box<Expression>,
319    },
320
321    /// Evaluate an identifier with multiple values on a given expression.
322    ///
323    /// Same as [`Self::For`], but instead of binding a variable,
324    /// an identifier is bounded to multiple values.
325    ///
326    /// For example: `for all i in (0..#a): ( @a[i] < 100 )`
327    ForIdentifiers {
328        /// How many times the body must evaluate to true for this expresion
329        /// to be true.
330        selection: ForSelection,
331
332        /// List of identifiers to bind.
333        ///
334        /// This is a list because the values bounded can be complex, ie
335        /// arrays or dictionaries. This list is the same length as the
336        /// cardinality of the values in the iterator.
337        identifiers: Vec<String>,
338
339        /// Span covering the identifiers declaration
340        identifiers_span: Range<usize>,
341
342        /// Values to bind to the identifiers.
343        iterator: ForIterator,
344
345        /// Span covering the iterator
346        iterator_span: Range<usize>,
347
348        /// Body to evaluate for each binding.
349        body: Box<Expression>,
350    },
351
352    /// Depend on multiple rules already declared in the namespace.
353    ///
354    /// If the number of matching rules in the set matches the `selection`,
355    /// this expression returns true.
356    ForRules {
357        /// How many variables must match for this expression to be true.
358        selection: ForSelection,
359
360        /// Which rules are selected.
361        set: RuleSet,
362    },
363
364    /// An identifier.
365    Identifier(Identifier),
366    /// A byte string.
367    Bytes(Vec<u8>),
368    /// A regex.
369    Regex(Regex),
370}
371
372/// Selection of variables in a 'for' expression.
373///
374/// This indicates how many variables must match the for condition
375/// for it to be considered true.
376#[derive(Clone, Debug, PartialEq)]
377pub enum ForSelection {
378    /// Any variable in the set must match the condition.
379    Any,
380    /// All of the variables in the set must match the condition.
381    All,
382    /// None of the variables in the set must match the condition.
383    None,
384    /// `ParsedExpr` that should evaluate to a number, indicating:
385    /// - if `as_percent` is false, how many variables in the set must match
386    ///   the condition.
387    /// - if `as_percent` is true, which percentage of variables in the set
388    ///   msut match the condition.
389    ///   the condition.
390    ///
391    /// Usually, the expression is a simple number.
392    Expr {
393        /// Number of variables selected
394        expr: Box<Expression>,
395        /// Should the number be a percentage.
396        as_percent: bool,
397    },
398}
399
400/// Iterator for a 'for' expression over an identifier.
401#[derive(Clone, Debug, PartialEq)]
402pub enum ForIterator {
403    /// Identifier to pick values from.
404    Identifier(Identifier),
405    /// Every value between two numbers
406    Range {
407        /// Start of the range, included
408        from: Box<Expression>,
409        /// End of the range, included
410        to: Box<Expression>,
411    },
412    /// List of values
413    List(Vec<Expression>),
414}
415
416/// Set of multiple variables.
417#[derive(Clone, Debug, PartialEq, Eq)]
418pub struct VariableSet {
419    /// Names of the variables in the set.
420    ///
421    /// If empty, the set is considered as containing *all* variables.
422    pub elements: Vec<SetElement>,
423}
424
425/// Element of a set.
426#[derive(Clone, Debug, PartialEq, Eq)]
427pub struct SetElement {
428    /// Name of the element.
429    pub name: String,
430
431    /// Is the name a wildcard, i.e. the element is `name*`.
432    pub is_wildcard: bool,
433
434    /// Span for the element.
435    pub span: Range<usize>,
436}
437
438/// Set of multiple rules.
439#[derive(Clone, Debug, PartialEq, Eq)]
440pub struct RuleSet {
441    /// Names of the rules in the set.
442    ///
443    /// The associated boolean indicates if the name has a trailing
444    /// wildcard.
445    pub elements: Vec<SetElement>,
446}
447
448/// A parsed expression with associated span
449#[derive(Clone, Debug, PartialEq)]
450pub struct Expression {
451    /// Kind of the expression.
452    pub expr: ExpressionKind,
453
454    /// Span of the whole expression in the input.
455    pub span: Range<usize>,
456}