boreal_parser/expression/mod.rs
1//! Types related to the condition part of YARA rules.
2use std::ops::Range;
3
4mod boolean_expression;
5mod common;
6mod for_expression;
7mod identifier;
8mod primary_expression;
9mod read_integer;
10mod string_expression;
11
12use crate::regex::Regex;
13
14pub(crate) use boolean_expression::boolean_expression as expression;
15
16/// Integer read type, see [`ExpressionKind::ReadInteger`].
17#[derive(Copy, Clone, Debug, PartialEq, Eq)]
18pub enum ReadIntegerType {
19 /// 8 bits, signed
20 Int8,
21 /// 8 bits, unsigned
22 Uint8,
23 /// 16 bits, signed
24 Int16,
25 /// 16 bits, signed, big-endian
26 Int16BE,
27 /// 16 bits, unsigned
28 Uint16,
29 /// 16 bits, unsigned, big-endian
30 Uint16BE,
31 /// 32 bits, signed
32 Int32,
33 /// 32 bits, signed, big-endian
34 Int32BE,
35 /// 32 bits, unsigned
36 Uint32,
37 /// 32 bits, unsigned, big-endian
38 Uint32BE,
39}
40
41/// Parsed identifier used in expressions.
42#[derive(Clone, Debug, PartialEq)]
43pub struct Identifier {
44 /// Name of the identifier
45 pub name: String,
46
47 /// Span covering the name of the identifier.
48 pub name_span: Range<usize>,
49
50 /// Operations on the identifier, stored in the order of operations.
51 ///
52 /// For example, `pe.sections[2].name` would give `pe` for the name, and
53 /// `[Subfield("sections"), Subscript(Expr::Integer(2)), Subfield("name")]` for the operations.
54 pub operations: Vec<IdentifierOperation>,
55}
56
57/// Operation applied on an identifier.
58#[derive(Clone, Debug, PartialEq)]
59pub struct IdentifierOperation {
60 /// Type of the operation
61 pub op: IdentifierOperationType,
62
63 /// Span covering the operation
64 pub span: Range<usize>,
65}
66
67/// Type of operation applied on an identifier.
68#[derive(Clone, Debug, PartialEq)]
69pub enum IdentifierOperationType {
70 /// Array subscript, i.e. `identifier[subscript]`.
71 Subscript(Box<Expression>),
72 /// Object subfield, i.e. `identifier.subfield`.
73 Subfield(String),
74 /// Function call, i.e. `identifier(arguments)`.
75 FunctionCall(Vec<Expression>),
76}
77
78/// An expression parsed in a Rule.
79#[derive(Clone, Debug, PartialEq)]
80pub enum ExpressionKind {
81 /// Size of the file being scanned.
82 Filesize,
83
84 /// Entrypoint of the file being scanned, if it is a PE or ELF.
85 ///
86 /// Deprecated, use the `pe` or `elf` module instead.
87 Entrypoint,
88
89 /// An integer read at a given address.
90 ///
91 /// See the yara documentation on `int8`, `uint16be` etc.
92 ReadInteger {
93 /// Which size and endianness to read.
94 ty: ReadIntegerType,
95 /// Address/Offset of the input where to read.
96 addr: Box<Expression>,
97 },
98
99 /// A i64 value.
100 Integer(i64),
101
102 /// A f64 floating-point value.
103 Double(f64),
104
105 /// Count number of matches on a given variable.
106 Count(String),
107
108 /// Count number of matches on a given variable in a specific range of the input.
109 CountInRange {
110 /// Name of the variable being counted
111 variable_name: String,
112 /// Span for the name of the variable
113 variable_name_span: Range<usize>,
114 /// Starting offset, included.
115 from: Box<Expression>,
116 /// Ending offset, included.
117 to: Box<Expression>,
118 },
119
120 /// Offset of a variable match
121 Offset {
122 /// Name of the variable
123 variable_name: String,
124
125 /// Occurrence number.
126 ///
127 /// `1` is the first match on the variable, `2` is the next one, etc.
128 occurence_number: Box<Expression>,
129 },
130
131 /// Length of a variable match
132 Length {
133 /// Name of the variable
134 variable_name: String,
135
136 /// Occurrence number.
137 ///
138 /// `1` is the first match on the variable, `2` is the next one, etc.
139 occurence_number: Box<Expression>,
140 },
141
142 /// Opposite value, for integers and floats.
143 Neg(Box<Expression>),
144
145 /// Addition, for integers and floats.
146 Add(Box<Expression>, Box<Expression>),
147 /// Substraction, for integers and floats.
148 Sub(Box<Expression>, Box<Expression>),
149 /// Multiplication, for integers and floats.
150 Mul(Box<Expression>, Box<Expression>),
151 /// Division, for integers and floats.
152 Div(Box<Expression>, Box<Expression>),
153
154 /// Modulo, for integers.
155 Mod(Box<Expression>, Box<Expression>),
156
157 /// Bitwise xor, for integers.
158 BitwiseXor(Box<Expression>, Box<Expression>),
159 /// Bitwise and, for integers.
160 BitwiseAnd(Box<Expression>, Box<Expression>),
161 /// Bitwise or, for integers.
162 BitwiseOr(Box<Expression>, Box<Expression>),
163
164 /// Bitwise negation, for integers.
165 BitwiseNot(Box<Expression>),
166
167 /// Shift left, both elements must be integers.
168 ShiftLeft(Box<Expression>, Box<Expression>),
169 /// Shift right, both elements must be integers.
170 ShiftRight(Box<Expression>, Box<Expression>),
171
172 /// Boolean and operation.
173 And(Vec<Expression>),
174 /// Boolean or operation.
175 Or(Vec<Expression>),
176
177 /// Boolean negation.
178 Not(Box<Expression>),
179
180 /// Comparison.
181 ///
182 /// Integers and floats can be compared to integers and floats.
183 /// Strings can be compared to strings.
184 Cmp {
185 /// Left operand.
186 left: Box<Expression>,
187 /// Right operand.
188 right: Box<Expression>,
189 /// If true this is '<', otherwise '>'
190 less_than: bool,
191 /// If true, left == right returns true.
192 can_be_equal: bool,
193 },
194
195 /// Equal
196 Eq(Box<Expression>, Box<Expression>),
197
198 /// Not equal
199 NotEq(Box<Expression>, Box<Expression>),
200
201 /// Does a string contains another string
202 Contains {
203 /// String to search in
204 haystack: Box<Expression>,
205 /// String to search
206 needle: Box<Expression>,
207 /// If true, the search is case insensitive.
208 case_insensitive: bool,
209 },
210
211 /// Does a string starts with another string
212 StartsWith {
213 /// String to search in
214 expr: Box<Expression>,
215 /// Prefix to search
216 prefix: Box<Expression>,
217 /// If true, the search is case insensitive.
218 case_insensitive: bool,
219 },
220
221 /// Does a string ends with another string
222 EndsWith {
223 /// String to search in
224 expr: Box<Expression>,
225 /// Prefix to search
226 suffix: Box<Expression>,
227 /// If true, the search is case insensitive.
228 case_insensitive: bool,
229 },
230
231 /// Case insensitive equality test. Both elements must be strings.
232 IEquals(Box<Expression>, Box<Expression>),
233
234 /// Does a string matches a regex.
235 Matches(Box<Expression>, Regex),
236
237 /// Is a given value defined.
238 ///
239 /// For example, `defined filesize` will be true when scanning a file,
240 /// false otherwise.
241 Defined(Box<Expression>),
242
243 /// A boolean value.
244 Boolean(bool),
245
246 /// Does a variable matches
247 Variable(String),
248
249 /// Does a variable matches at a given offset.
250 VariableAt {
251 /// Name of the variable
252 variable_name: String,
253 /// Span for the name of the variable
254 variable_name_span: Range<usize>,
255 /// Offset
256 offset: Box<Expression>,
257 },
258
259 /// Does a variable matches in a given offset range.
260 VariableIn {
261 /// Name of the variable.
262 variable_name: String,
263 /// Span for the name of the variable
264 variable_name_span: Range<usize>,
265 /// Starting offset, included.
266 from: Box<Expression>,
267 /// Ending offset, included.
268 to: Box<Expression>,
269 },
270
271 /// Evaluate multiple variables on a given expression.
272 ///
273 /// For each variable in `set`, evaluate `body`.
274 /// Then, if the number of evaluations returning true
275 /// matches the `selection`, then this expression returns true.
276 For {
277 /// How many variables must match for this expression to be true.
278 selection: ForSelection,
279
280 /// Which variables to select.
281 set: VariableSet,
282
283 /// `ParsedExpr` to evaluate for each variable.
284 ///
285 /// The body can contain `$`, `#`, `@` or `!` to refer to the
286 /// currently selected variable.
287 ///
288 /// If unset, this is equivalent to `$`, i.e. true if the selected
289 /// variable matches.
290 body: Option<Box<Expression>>,
291 },
292
293 /// Evaluate the presence of multiple variables in a given range.
294 ///
295 /// This is equivalent to a [`Self::For`] value, with a body
296 /// set to `$ in (from..to)`.
297 ForIn {
298 /// How many variables must match for this expresion to be true.
299 selection: ForSelection,
300 /// Which variables to select.
301 set: VariableSet,
302 /// Starting offset, included.
303 from: Box<Expression>,
304 /// Ending offset, included.
305 to: Box<Expression>,
306 },
307
308 /// Evaluate the presence of multiple variables at a given offset.
309 ///
310 /// This is equivalent to a [`Self::For`] value, with a body
311 /// set to `$ at expr`.
312 ForAt {
313 /// How many variables must match for this expresion to be true.
314 selection: ForSelection,
315 /// Which variables to select.
316 set: VariableSet,
317 /// Offset of the variable match.
318 offset: Box<Expression>,
319 },
320
321 /// Evaluate an identifier with multiple values on a given expression.
322 ///
323 /// Same as [`Self::For`], but instead of binding a variable,
324 /// an identifier is bounded to multiple values.
325 ///
326 /// For example: `for all i in (0..#a): ( @a[i] < 100 )`
327 ForIdentifiers {
328 /// How many times the body must evaluate to true for this expresion
329 /// to be true.
330 selection: ForSelection,
331
332 /// List of identifiers to bind.
333 ///
334 /// This is a list because the values bounded can be complex, ie
335 /// arrays or dictionaries. This list is the same length as the
336 /// cardinality of the values in the iterator.
337 identifiers: Vec<String>,
338
339 /// Span covering the identifiers declaration
340 identifiers_span: Range<usize>,
341
342 /// Values to bind to the identifiers.
343 iterator: ForIterator,
344
345 /// Span covering the iterator
346 iterator_span: Range<usize>,
347
348 /// Body to evaluate for each binding.
349 body: Box<Expression>,
350 },
351
352 /// Depend on multiple rules already declared in the namespace.
353 ///
354 /// If the number of matching rules in the set matches the `selection`,
355 /// this expression returns true.
356 ForRules {
357 /// How many variables must match for this expression to be true.
358 selection: ForSelection,
359
360 /// Which rules are selected.
361 set: RuleSet,
362 },
363
364 /// An identifier.
365 Identifier(Identifier),
366 /// A byte string.
367 Bytes(Vec<u8>),
368 /// A regex.
369 Regex(Regex),
370}
371
372/// Selection of variables in a 'for' expression.
373///
374/// This indicates how many variables must match the for condition
375/// for it to be considered true.
376#[derive(Clone, Debug, PartialEq)]
377pub enum ForSelection {
378 /// Any variable in the set must match the condition.
379 Any,
380 /// All of the variables in the set must match the condition.
381 All,
382 /// None of the variables in the set must match the condition.
383 None,
384 /// `ParsedExpr` that should evaluate to a number, indicating:
385 /// - if `as_percent` is false, how many variables in the set must match
386 /// the condition.
387 /// - if `as_percent` is true, which percentage of variables in the set
388 /// msut match the condition.
389 /// the condition.
390 ///
391 /// Usually, the expression is a simple number.
392 Expr {
393 /// Number of variables selected
394 expr: Box<Expression>,
395 /// Should the number be a percentage.
396 as_percent: bool,
397 },
398}
399
400/// Iterator for a 'for' expression over an identifier.
401#[derive(Clone, Debug, PartialEq)]
402pub enum ForIterator {
403 /// Identifier to pick values from.
404 Identifier(Identifier),
405 /// Every value between two numbers
406 Range {
407 /// Start of the range, included
408 from: Box<Expression>,
409 /// End of the range, included
410 to: Box<Expression>,
411 },
412 /// List of values
413 List(Vec<Expression>),
414}
415
416/// Set of multiple variables.
417#[derive(Clone, Debug, PartialEq, Eq)]
418pub struct VariableSet {
419 /// Names of the variables in the set.
420 ///
421 /// If empty, the set is considered as containing *all* variables.
422 pub elements: Vec<SetElement>,
423}
424
425/// Element of a set.
426#[derive(Clone, Debug, PartialEq, Eq)]
427pub struct SetElement {
428 /// Name of the element.
429 pub name: String,
430
431 /// Is the name a wildcard, i.e. the element is `name*`.
432 pub is_wildcard: bool,
433
434 /// Span for the element.
435 pub span: Range<usize>,
436}
437
438/// Set of multiple rules.
439#[derive(Clone, Debug, PartialEq, Eq)]
440pub struct RuleSet {
441 /// Names of the rules in the set.
442 ///
443 /// The associated boolean indicates if the name has a trailing
444 /// wildcard.
445 pub elements: Vec<SetElement>,
446}
447
448/// A parsed expression with associated span
449#[derive(Clone, Debug, PartialEq)]
450pub struct Expression {
451 /// Kind of the expression.
452 pub expr: ExpressionKind,
453
454 /// Span of the whole expression in the input.
455 pub span: Range<usize>,
456}