Skip to main content

yara_x_parser/ast/
mod.rs

1/*! Abstract Syntax Tree (AST) for YARA rules.
2
3Each structure or enum in this module corresponds to some construct in the YARA
4language, like a rule, expression, identifier, import statement, etc.
5
6*/
7
8use std::borrow::Cow;
9use std::fmt;
10use std::fmt::{Debug, Display, Formatter};
11use std::slice::Iter;
12
13use ::ascii_tree::write_tree;
14use bitflags::bitflags;
15use bstr::{BStr, BString, ByteSlice, Utf8Error};
16
17use crate::ast::cst2ast::Builder;
18use crate::cst::SyntaxKind::{
19    ASCII_KW, BASE64_KW, BASE64WIDE_KW, FULLWORD_KW, NOCASE_KW, WIDE_KW,
20    XOR_KW,
21};
22use crate::cst::{CSTStream, Event};
23use crate::{Parser, Span};
24
25mod ascii_tree;
26mod cst2ast;
27mod errors;
28#[cfg(test)]
29mod tests;
30
31pub mod dfs;
32
33pub use errors::Error;
34
35/// Abstract Syntax Tree (AST) for YARA rules.
36pub struct AST<'src> {
37    /// The list of items in the AST (imports, includes, and rules).
38    pub items: Vec<Item<'src>>,
39    /// Errors that occurred while parsing the rules.
40    errors: Vec<Error>,
41}
42
43/// Top level items in the AST.
44pub enum Item<'src> {
45    Import(Import<'src>),
46    Include(Include<'src>),
47    Rule(Rule<'src>),
48}
49
50impl<'src> From<&'src str> for AST<'src> {
51    /// Creates an [`AST`] from the give source code.
52    #[inline]
53    fn from(src: &'src str) -> Self {
54        AST::from(src.as_bytes())
55    }
56}
57
58impl<'src> From<&'src [u8]> for AST<'src> {
59    /// Creates an [`AST`] from the give source code.
60    #[inline]
61    fn from(src: &'src [u8]) -> Self {
62        AST::from(Parser::new(src))
63    }
64}
65
66impl<'src> From<Parser<'src>> for AST<'src> {
67    /// Creates an [`AST`] from the given [`Parser`].
68    fn from(parser: Parser<'src>) -> Self {
69        AST::new(parser.source(), parser)
70    }
71}
72
73impl<'src, I> From<CSTStream<'src, I>> for AST<'src>
74where
75    I: Iterator<Item = Event>,
76{
77    /// Creates an [`AST`] from the given [`CSTStream`].
78    fn from(cst: CSTStream<'src, I>) -> Self {
79        AST::new(cst.source(), cst)
80    }
81}
82
83impl<'src> AST<'src> {
84    /// Creates a new AST from YARA source code and an iterator of [`Event`]
85    /// items representing the parsed structure of that code.
86    ///
87    /// # Panics
88    ///
89    /// This is a low-level API that requires the `events` iterator to perfectly
90    /// match the provided source code. This function will panic if the events
91    /// are inconsistent with the source or do not originate from parsing this
92    /// specific code.
93    #[doc(hidden)]
94    pub fn new<I: Iterator<Item = Event>>(
95        src: &'src [u8],
96        events: I,
97    ) -> AST<'src> {
98        Builder::new(src, events).build_ast()
99    }
100
101    /// Returns the top level items in the AST.
102    ///
103    /// A top level item can be an import, include, or rule.
104    #[inline]
105    pub fn items(&self) -> impl Iterator<Item = &Item<'src>> {
106        self.items.iter()
107    }
108
109    /// Returns the import statements in the AST.
110    pub fn imports(&self) -> impl Iterator<Item = &Import<'src>> {
111        self.items.iter().filter_map(|item| {
112            if let Item::Import(import) = item { Some(import) } else { None }
113        })
114    }
115
116    /// Returns the rules in the AST.
117    pub fn rules(&self) -> impl Iterator<Item = &Rule<'src>> {
118        self.items.iter().filter_map(|item| {
119            if let Item::Rule(rule) = item { Some(rule) } else { None }
120        })
121    }
122
123    /// Returns the errors found while parsing the source code.
124    #[inline]
125    pub fn errors(&self) -> &[Error] {
126        self.errors.as_slice()
127    }
128
129    /// Consumes the parser, and returns the errors found while
130    /// parsing the source code as a vector.
131    #[inline]
132    pub fn into_errors(self) -> Vec<Error> {
133        self.errors
134    }
135}
136
137impl Debug for AST<'_> {
138    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
139        for rule in self.rules() {
140            write_tree(f, &ascii_tree::rule_ascii_tree(rule))?;
141            writeln!(f)?;
142        }
143
144        if !self.errors.is_empty() {
145            writeln!(f, "ERRORS:")?;
146            for err in &self.errors {
147                writeln!(f, "- {err:?}")?;
148            }
149        }
150
151        Ok(())
152    }
153}
154
155/// An import statement.
156#[derive(Debug)]
157pub struct Import<'src> {
158    span: Span,
159    pub module_name: &'src str,
160}
161
162/// An include statement.
163#[derive(Debug)]
164pub struct Include<'src> {
165    span: Span,
166    pub file_name: &'src str,
167}
168
169/// A YARA rule.
170#[derive(Debug)]
171pub struct Rule<'src> {
172    pub flags: RuleFlags,
173    pub identifier: Ident<'src>,
174    pub tags: Option<Vec<Ident<'src>>>,
175    pub meta: Option<Vec<Meta<'src>>>,
176    pub patterns: Option<Vec<Pattern<'src>>>,
177    pub condition: Expr<'src>,
178}
179
180bitflags! {
181    /// A set of flags associated to a YARA rule.
182    #[derive(Debug)]
183    pub struct RuleFlags: u8 {
184        const Private = 0x01;
185        const Global = 0x02;
186    }
187}
188
189/// A metadata entry in a YARA rule.
190#[derive(Debug)]
191pub struct Meta<'src> {
192    pub identifier: Ident<'src>,
193    pub value: MetaValue<'src>,
194}
195
196/// Each of the possible values that can have a metadata entry.
197#[derive(Debug)]
198pub enum MetaValue<'src> {
199    Bool((bool, Span)),
200    Integer((i64, Span)),
201    Float((f64, Span)),
202    String((&'src str, Span)),
203    Bytes((BString, Span)),
204}
205
206impl Display for MetaValue<'_> {
207    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
208        match self {
209            Self::Bool((v, _)) => write!(f, "{v}"),
210            Self::Integer((v, _)) => write!(f, "{v}"),
211            Self::Float((v, _)) => write!(f, "{v:.1}"),
212            Self::String((v, _)) => write!(f, "\"{v}\""),
213            Self::Bytes((v, _)) => write!(f, "\"{v}\""),
214        }
215    }
216}
217
218/// An identifier (e.g. `some_ident`).
219#[derive(Debug, Clone, Default)]
220pub struct Ident<'src> {
221    span: Span,
222    #[doc(hidden)]
223    pub name: &'src str,
224}
225
226impl<'src> Ident<'src> {
227    #[doc(hidden)]
228    pub fn new(name: &'src str) -> Self {
229        Self { name, span: Default::default() }
230    }
231
232    pub fn starts_with(&self, pat: &str) -> bool {
233        self.name.starts_with(pat)
234    }
235}
236
237/// An expression where an identifier can be accompanied by a range
238/// (e.g. `#a in <range>`).
239///
240/// The range is optional thought, so expressions like `#a` are also
241/// represented by this struct.
242#[derive(Debug)]
243pub struct IdentWithRange<'src> {
244    span: Span,
245    pub identifier: Ident<'src>,
246    pub range: Option<Range<'src>>,
247}
248
249/// An expression where an identifier can be accompanied by an index
250/// (e.g. `@a[2]`).
251///
252/// The index is optional thought, so expressions like `@a` are also
253/// represented by this struct.
254#[derive(Debug)]
255pub struct IdentWithIndex<'src> {
256    span: Span,
257    pub identifier: Ident<'src>,
258    pub index: Option<Expr<'src>>,
259}
260
261/// Types of patterns (a.k.a. strings) that can appear in a YARA rule.
262///
263/// Possible types are: text patterns, hex patterns and regular expressions.
264#[derive(Debug)]
265pub enum Pattern<'src> {
266    Text(Box<TextPattern<'src>>),
267    Hex(Box<HexPattern<'src>>),
268    Regexp(Box<RegexpPattern<'src>>),
269}
270
271impl<'src> Pattern<'src> {
272    pub fn identifier(&self) -> &Ident<'src> {
273        match self {
274            Pattern::Text(p) => &p.identifier,
275            Pattern::Regexp(p) => &p.identifier,
276            Pattern::Hex(p) => &p.identifier,
277        }
278    }
279
280    pub fn modifiers(&self) -> &PatternModifiers<'src> {
281        match self {
282            Pattern::Text(p) => &p.modifiers,
283            Pattern::Hex(p) => &p.modifiers,
284            Pattern::Regexp(p) => &p.modifiers,
285        }
286    }
287}
288
289/// A text pattern (a.k.a. text string) in a YARA rule.
290#[derive(Debug)]
291pub struct TextPattern<'src> {
292    pub identifier: Ident<'src>,
293    pub text: LiteralString<'src>,
294    pub modifiers: PatternModifiers<'src>,
295}
296
297/// A regular expression pattern in a YARA rule.
298#[derive(Debug)]
299pub struct RegexpPattern<'src> {
300    pub identifier: Ident<'src>,
301    pub regexp: Regexp<'src>,
302    pub modifiers: PatternModifiers<'src>,
303}
304
305/// A hex pattern (a.k.a. hex string) in a YARA rule.
306#[derive(Debug, Default)]
307pub struct HexPattern<'src> {
308    span: Span,
309    pub identifier: Ident<'src>,
310    pub sub_patterns: HexSubPattern,
311    pub modifiers: PatternModifiers<'src>,
312}
313
314impl<'src> HexPattern<'src> {
315    #[doc(hidden)]
316    pub fn new(ident: &'src str) -> Self {
317        Self {
318            identifier: Ident::new(ident),
319            span: Span::default(),
320            ..Default::default()
321        }
322    }
323}
324
325/// A sequence of tokens that conform a hex pattern (a.k.a. hex string).
326#[derive(Debug, Default)]
327pub struct HexSubPattern(pub Vec<HexToken>);
328
329impl HexSubPattern {
330    #[inline]
331    pub fn iter(&self) -> impl Iterator<Item = &HexToken> {
332        self.0.iter()
333    }
334
335    #[inline]
336    pub fn len(&self) -> usize {
337        self.0.len()
338    }
339
340    #[inline]
341    pub fn is_empty(&self) -> bool {
342        self.0.is_empty()
343    }
344}
345
346/// Each of the types of tokens in a hex pattern (a.k.a. hex string).
347///
348/// A token can be a single byte, a negated byte (e.g. `~XX`), an
349/// alternative (e.g `(XXXX|YYYY)`), or a jump (e.g `[0-10]`).
350#[derive(Debug)]
351pub enum HexToken {
352    Byte(HexByte),
353    NotByte(HexByte),
354    Alternative(Box<HexAlternative>),
355    Jump(HexJump),
356}
357
358/// A single byte in a hex pattern (a.k.a. hex string).
359///
360/// The byte's value is accompanied by a mask that indicates which bits in the
361/// value are taken into account during matching, and which are ignored. A bit
362/// set to 1 in the mask indicates that the corresponding bit in the value is
363/// taken into account, while a bit set to 0 indicates that the corresponding
364/// bit in the value is ignored. Ignored bits are always set to 0 in the value.
365///
366/// For example, for pattern `A?` the value is `A0` and the mask is `F0`, and
367/// for pattern `?1` the value is `01` and the mask is `0F`.
368#[derive(Clone, Debug, PartialEq, Eq)]
369pub struct HexByte {
370    span: Span,
371    pub value: u8,
372    pub mask: u8,
373}
374
375impl HexByte {
376    #[doc(hidden)]
377    pub fn new(value: u8, mask: u8) -> Self {
378        Self { value, mask, span: Span::default() }
379    }
380}
381
382/// An alternative in a hex pattern (a.k.a. hex string).
383///
384/// Alternatives are sequences of hex sub-patterns separated by `|`.
385#[derive(Debug, Default)]
386pub struct HexAlternative {
387    span: Span,
388    pub alternatives: Vec<HexSubPattern>,
389}
390
391impl HexAlternative {
392    #[doc(hidden)]
393    pub fn new(alternatives: Vec<HexSubPattern>) -> Self {
394        Self { alternatives, span: Span::default() }
395    }
396}
397
398/// A jump in a hex pattern (a.k.a. hex string).
399#[derive(Debug, Clone, Default)]
400pub struct HexJump {
401    span: Span,
402    pub start: Option<u32>,
403    pub end: Option<u32>,
404}
405
406impl HexJump {
407    #[doc(hidden)]
408    pub fn new(start: Option<u32>, end: Option<u32>) -> Self {
409        Self { start, end, span: Span::default() }
410    }
411}
412
413impl Display for HexJump {
414    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
415        match (self.start, self.end) {
416            (Some(start), Some(end)) => write!(f, "[{start}-{end}]"),
417            (Some(start), None) => write!(f, "[{start}-]"),
418            (None, Some(end)) => write!(f, "[-{end}]"),
419            (None, None) => write!(f, "[-]"),
420        }
421    }
422}
423
424/// An `of` expression (e.g. `1 of ($a, $b)`, `all of them`,
425/// `any of (true, false)`)
426#[derive(Debug)]
427pub struct Of<'src> {
428    span: Span,
429    pub quantifier: Quantifier<'src>,
430    pub items: OfItems<'src>,
431    pub anchor: Option<MatchAnchor<'src>>,
432}
433
434/// A `for .. of` expression (e.g `for all of them : (..)`,
435/// `for 1 of ($a,$b) : (..)`)
436#[derive(Debug)]
437pub struct ForOf<'src> {
438    span: Span,
439    pub quantifier: Quantifier<'src>,
440    pub pattern_set: PatternSet<'src>,
441    pub body: Expr<'src>,
442}
443
444/// A `for .. in` expression (e.g `for all x in iterator : (..)`)
445#[derive(Debug)]
446pub struct ForIn<'src> {
447    span: Span,
448    pub quantifier: Quantifier<'src>,
449    pub variables: Vec<Ident<'src>>,
450    pub iterable: Iterable<'src>,
451    pub body: Expr<'src>,
452}
453
454/// Items in a `of` expression.
455#[derive(Debug)]
456pub enum OfItems<'src> {
457    PatternSet(PatternSet<'src>),
458    BoolExprTuple(Vec<Expr<'src>>),
459}
460
461/// A `with` expression (e.g `with foo = 1 + 1 : (..)`)
462#[derive(Debug)]
463pub struct With<'src> {
464    span: Span,
465    pub declarations: Vec<WithDeclaration<'src>>,
466    pub body: Expr<'src>,
467}
468
469/// Items in a `with` expression.
470#[derive(Debug)]
471pub struct WithDeclaration<'src> {
472    span: Span,
473    pub identifier: Ident<'src>,
474    pub expression: Expr<'src>,
475}
476
477/// A quantifier used in `for` and `of` expressions.
478#[derive(Debug)]
479pub enum Quantifier<'src> {
480    None {
481        span: Span,
482    },
483    All {
484        span: Span,
485    },
486    Any {
487        span: Span,
488    },
489    /// Used in expressions like `10% of them`.
490    Percentage(Expr<'src>),
491    /// Used in expressions like `10 of them`.
492    Expr(Expr<'src>),
493}
494
495/// Possible iterable expressions that can use in a [`ForIn`].
496#[derive(Debug)]
497pub enum Iterable<'src> {
498    Range(Range<'src>),
499    ExprTuple(Vec<Expr<'src>>),
500    Expr(Expr<'src>),
501}
502
503/// Either a set of pattern identifiers (possibly with wildcards), or the
504/// special set `them`, which includes all the patterns declared in the rule.
505#[derive(Debug)]
506pub enum PatternSet<'src> {
507    Them { span: Span },
508    Set(Vec<PatternSetItem<'src>>),
509}
510
511/// Each individual item in a set of patterns.
512///
513/// In the pattern set `($a, $b*)`, `$a` and `$b*` are represented by a
514/// [`PatternSetItem`].
515#[derive(Debug)]
516pub struct PatternSetItem<'src> {
517    span: Span,
518    pub identifier: &'src str,
519    pub wildcard: bool,
520}
521
522impl PatternSetItem<'_> {
523    /// Returns true if `ident` matches this [`PatternSetItem`].
524    ///
525    /// For example, identifiers `$a` and `$abc` both match the
526    /// [`PatternSetItem`] for `$a*`.
527    pub fn matches(&self, identifier: &Ident) -> bool {
528        if self.wildcard {
529            identifier.name.starts_with(self.identifier)
530        } else {
531            identifier.name == self.identifier
532        }
533    }
534}
535
536/// An expression in the AST.
537#[derive(Debug)]
538pub enum Expr<'src> {
539    True {
540        span: Span,
541    },
542
543    False {
544        span: Span,
545    },
546
547    Filesize {
548        span: Span,
549    },
550
551    Entrypoint {
552        span: Span,
553    },
554
555    /// A literal string, (e.g: `"abcd"`)
556    LiteralString(Box<LiteralString<'src>>),
557
558    /// A literal integer, (e.g: `1`, `0xAB`)
559    LiteralInteger(Box<LiteralInteger<'src>>),
560
561    /// A literal float, (e.g: `2.0`, `3.14`)
562    LiteralFloat(Box<LiteralFloat<'src>>),
563
564    /// A regular expression (e.g: `/ab.*cd/i`)
565    Regexp(Box<Regexp<'src>>),
566
567    /// Identifier (e.g. `some_identifier`).
568    Ident(Box<Ident<'src>>),
569
570    /// Pattern match expression (e.g. `$`, `$a`, `$a at 0`, `$a in (0..10)`)
571    PatternMatch(Box<PatternMatch<'src>>),
572
573    /// Pattern count expression (e.g. `#`, `#a`, `#a in (0..10)`)
574    PatternCount(Box<IdentWithRange<'src>>),
575
576    /// Pattern offset expression (e.g. `@` `@a`, `@a[1]`)
577    PatternOffset(Box<IdentWithIndex<'src>>),
578
579    /// Pattern length expression (e.g. `!`, `!a`, `!a[1]`)
580    PatternLength(Box<IdentWithIndex<'src>>),
581
582    /// Array or dictionary lookup expression (e.g. `array[1]`, `dict["key"]`)
583    Lookup(Box<Lookup<'src>>),
584
585    /// A field lookup expression (e.g. `foo.bar`)
586    FieldAccess(Box<NAryExpr<'src>>),
587
588    /// A function call expression (e.g. `foo()`, `bar(1,2)`)
589    FuncCall(Box<FuncCall<'src>>),
590
591    /// A `defined` expression (e.g. `defined foo`)
592    Defined(Box<UnaryExpr<'src>>),
593
594    /// Boolean `not` expression.
595    Not(Box<UnaryExpr<'src>>),
596
597    /// Boolean `and` expression.
598    And(Box<NAryExpr<'src>>),
599
600    /// Boolean `or` expression.
601    Or(Box<NAryExpr<'src>>),
602
603    /// Arithmetic minus.
604    Minus(Box<UnaryExpr<'src>>),
605
606    /// Arithmetic add (`+`) expression.
607    Add(Box<NAryExpr<'src>>),
608
609    /// Arithmetic subtraction (`-`) expression.
610    Sub(Box<NAryExpr<'src>>),
611
612    /// Arithmetic multiplication (`*`) expression.
613    Mul(Box<NAryExpr<'src>>),
614
615    /// Arithmetic division (`\`) expression.
616    Div(Box<NAryExpr<'src>>),
617
618    /// Arithmetic modulus (`%`) expression.
619    Mod(Box<NAryExpr<'src>>),
620
621    /// Bitwise not (`~`) expression.
622    BitwiseNot(Box<UnaryExpr<'src>>),
623
624    /// Bitwise shift left (`<<`) expression.
625    Shl(Box<BinaryExpr<'src>>),
626
627    /// Bitwise shift right (`>>`) expression.
628    Shr(Box<BinaryExpr<'src>>),
629
630    /// Bitwise and (`&`) expression.
631    BitwiseAnd(Box<BinaryExpr<'src>>),
632
633    /// Bitwise or (`|`) expression.
634    BitwiseOr(Box<BinaryExpr<'src>>),
635
636    /// Bitwise xor (`^`) expression.
637    BitwiseXor(Box<BinaryExpr<'src>>),
638
639    /// Equal (`==`) expression.
640    Eq(Box<BinaryExpr<'src>>),
641
642    /// Not equal (`!=`) expression.
643    Ne(Box<BinaryExpr<'src>>),
644
645    /// Less than (`<`) expression.
646    Lt(Box<BinaryExpr<'src>>),
647
648    /// Greater than (`>`) expression.
649    Gt(Box<BinaryExpr<'src>>),
650
651    /// Less or equal (`<=`) expression.
652    Le(Box<BinaryExpr<'src>>),
653
654    /// Greater or equal (`>=`) expression.
655    Ge(Box<BinaryExpr<'src>>),
656
657    /// `contains` expression.
658    Contains(Box<BinaryExpr<'src>>),
659
660    /// `icontains` expression
661    IContains(Box<BinaryExpr<'src>>),
662
663    /// `startswith` expression.
664    StartsWith(Box<BinaryExpr<'src>>),
665
666    /// `istartswith` expression
667    IStartsWith(Box<BinaryExpr<'src>>),
668
669    /// `endswith` expression.
670    EndsWith(Box<BinaryExpr<'src>>),
671
672    /// `iendswith` expression
673    IEndsWith(Box<BinaryExpr<'src>>),
674
675    /// `iequals` expression.
676    IEquals(Box<BinaryExpr<'src>>),
677
678    /// `matches` expression.
679    Matches(Box<BinaryExpr<'src>>),
680
681    /// An `of` expression (e.g. `1 of ($a, $b)`, `all of them`)
682    Of(Box<Of<'src>>),
683
684    /// A `for <quantifier> of ...` expression. (e.g. `for any of ($a, $b) : ( ... )`)
685    ForOf(Box<ForOf<'src>>),
686
687    /// A `for <quantifier> <vars> in ...` expression. (e.g. `for all i in (1..100) : ( ... )`)
688    ForIn(Box<ForIn<'src>>),
689
690    /// A `with` expression (e.g. `with foo = 1 + 1 : ( ... )`)
691    With(Box<With<'src>>),
692}
693
694/// A set of modifiers associated to a pattern.
695#[derive(Debug, Default)]
696pub struct PatternModifiers<'src> {
697    modifiers: Vec<PatternModifier<'src>>,
698}
699
700impl<'src> PatternModifiers<'src> {
701    pub(crate) fn new(modifiers: Vec<PatternModifier<'src>>) -> Self {
702        Self { modifiers }
703    }
704
705    /// Returns an iterator for all the modifiers associated to the pattern.
706    #[inline]
707    pub fn iter(&self) -> PatternModifiersIter<'_> {
708        PatternModifiersIter { iter: self.modifiers.iter() }
709    }
710
711    /// Returns true if the pattern has no modifiers.
712    #[inline]
713    pub fn is_empty(&self) -> bool {
714        self.modifiers.is_empty()
715    }
716
717    #[inline]
718    pub fn ascii(&self) -> Option<&PatternModifier<'src>> {
719        self.modifiers
720            .iter()
721            .find(|m| matches!(m, PatternModifier::Ascii { .. }))
722    }
723
724    #[inline]
725    pub fn wide(&self) -> Option<&PatternModifier<'src>> {
726        self.modifiers
727            .iter()
728            .find(|m| matches!(m, PatternModifier::Wide { .. }))
729    }
730
731    #[inline]
732    pub fn base64(&self) -> Option<&PatternModifier<'src>> {
733        self.modifiers
734            .iter()
735            .find(|m| matches!(m, PatternModifier::Base64 { .. }))
736    }
737
738    #[inline]
739    pub fn base64wide(&self) -> Option<&PatternModifier<'src>> {
740        self.modifiers
741            .iter()
742            .find(|m| matches!(m, PatternModifier::Base64Wide { .. }))
743    }
744
745    #[inline]
746    pub fn fullword(&self) -> Option<&PatternModifier<'src>> {
747        self.modifiers
748            .iter()
749            .find(|m| matches!(m, PatternModifier::Fullword { .. }))
750    }
751
752    #[inline]
753    pub fn nocase(&self) -> Option<&PatternModifier<'src>> {
754        self.modifiers
755            .iter()
756            .find(|m| matches!(m, PatternModifier::Nocase { .. }))
757    }
758
759    #[inline]
760    pub fn private(&self) -> Option<&PatternModifier<'src>> {
761        self.modifiers
762            .iter()
763            .find(|m| matches!(m, PatternModifier::Private { .. }))
764    }
765
766    #[inline]
767    pub fn xor(&self) -> Option<&PatternModifier<'src>> {
768        self.modifiers
769            .iter()
770            .find(|m| matches!(m, PatternModifier::Xor { .. }))
771    }
772}
773
774/// Iterator that returns all the modifiers in a [`PatternModifiers`].
775///
776/// This is the result of [`PatternModifiers::iter`].
777pub struct PatternModifiersIter<'src> {
778    iter: Iter<'src, PatternModifier<'src>>,
779}
780
781impl<'src> Iterator for PatternModifiersIter<'src> {
782    type Item = &'src PatternModifier<'src>;
783
784    fn next(&mut self) -> Option<Self::Item> {
785        self.iter.next()
786    }
787}
788
789/// A pattern (a.k.a. string) modifier.
790#[derive(Debug)]
791pub enum PatternModifier<'src> {
792    Ascii { span: Span },
793    Wide { span: Span },
794    Nocase { span: Span },
795    Private { span: Span },
796    Fullword { span: Span },
797    Base64 { span: Span, alphabet: Option<LiteralString<'src>> },
798    Base64Wide { span: Span, alphabet: Option<LiteralString<'src>> },
799    Xor { span: Span, start: u8, end: u8 },
800}
801
802impl PatternModifier<'_> {
803    pub fn as_text(&self) -> &'static str {
804        match self {
805            PatternModifier::Ascii { .. } => "ascii",
806            PatternModifier::Wide { .. } => "wide",
807            PatternModifier::Nocase { .. } => "nocase",
808            PatternModifier::Private { .. } => "private",
809            PatternModifier::Fullword { .. } => "fullword",
810            PatternModifier::Base64 { .. } => "base64",
811            PatternModifier::Base64Wide { .. } => "base64wide",
812            PatternModifier::Xor { .. } => "xor",
813        }
814    }
815}
816
817impl Display for PatternModifier<'_> {
818    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
819        match self {
820            PatternModifier::Ascii { .. } => {
821                write!(f, "ascii")
822            }
823            PatternModifier::Wide { .. } => {
824                write!(f, "wide")
825            }
826            PatternModifier::Nocase { .. } => {
827                write!(f, "nocase")
828            }
829            PatternModifier::Private { .. } => {
830                write!(f, "private")
831            }
832            PatternModifier::Fullword { .. } => {
833                write!(f, "fullword")
834            }
835            PatternModifier::Base64 { alphabet, .. } => {
836                if let Some(alphabet) = alphabet {
837                    write!(f, "base64({})", alphabet.literal)
838                } else {
839                    write!(f, "base64")
840                }
841            }
842            PatternModifier::Base64Wide { alphabet, .. } => {
843                if let Some(alphabet) = alphabet {
844                    write!(f, "base64wide({})", alphabet.literal)
845                } else {
846                    write!(f, "base64wide")
847                }
848            }
849            PatternModifier::Xor { start, end, .. } => {
850                if *start == 0 && *end == 255 {
851                    write!(f, "xor")
852                } else if *start == *end {
853                    write!(f, "xor({start})")
854                } else {
855                    write!(f, "xor({start}-{end})")
856                }
857            }
858        }
859    }
860}
861
862/// A pattern match expression (e.g. `$a`, `$b at 0`, `$c in (0..10)`).
863#[derive(Debug)]
864pub struct PatternMatch<'src> {
865    pub identifier: Ident<'src>,
866    pub anchor: Option<MatchAnchor<'src>>,
867}
868
869/// In expressions like `$a at 0` and `$b in (0..10)`, this type represents the
870/// anchor (e.g. `at <expr>`, `in <range>`).
871///
872/// The anchor is the part of the expression that restricts the offset range
873/// where the match can occur.
874/// (e.g. `at <expr>`, `in <range>`).
875#[derive(Debug)]
876pub enum MatchAnchor<'src> {
877    At(Box<At<'src>>),
878    In(Box<In<'src>>),
879}
880
881/// In expressions like `$a at 0`, this type represents the anchor
882/// (e.g. `at <expr>`).
883#[derive(Debug)]
884pub struct At<'src> {
885    span: Span,
886    pub expr: Expr<'src>,
887}
888
889/// A pair of values conforming a range (e.g. `(0..10)`).
890#[derive(Debug)]
891pub struct Range<'src> {
892    span: Span,
893    pub lower_bound: Expr<'src>,
894    pub upper_bound: Expr<'src>,
895}
896
897/// In expressions like `$a in (0..10)`, this struct represents the anchor
898/// e.g. `in <range>`).
899#[derive(Debug)]
900pub struct In<'src> {
901    span: Span,
902    pub range: Range<'src>,
903}
904
905/// An expression representing a function call.
906#[derive(Debug)]
907pub struct FuncCall<'src> {
908    args_span: Span,
909    pub object: Option<Expr<'src>>,
910    pub identifier: Ident<'src>,
911    pub args: Vec<Expr<'src>>,
912}
913
914impl FuncCall<'_> {
915    /// Span covered by the function's arguments in the source code.
916    ///
917    /// [`FuncCall::span`] covers the whole function call, including the
918    /// function identifier and the arguments, while this covers only the
919    /// arguments.
920    pub fn args_span(&self) -> Span {
921        self.args_span.clone()
922    }
923}
924
925/// A lookup operation in an array or dictionary.
926#[derive(Debug)]
927pub struct Lookup<'src> {
928    span: Span,
929    pub primary: Expr<'src>,
930    pub index: Expr<'src>,
931}
932
933/// A literal string (e.g: `"abcd"`).
934#[derive(Debug)]
935pub struct LiteralString<'src> {
936    span: Span,
937    /// The literal string as it appears in the source code, including the
938    /// quotes.
939    pub literal: &'src str,
940    /// The value of the string literal. Escaped characters, if any, are
941    /// unescaped. Doesn't include the quotes.
942    pub value: Cow<'src, BStr>,
943}
944
945impl LiteralString<'_> {
946    pub fn as_str(&self) -> Result<&str, Utf8Error> {
947        match &self.value {
948            // SAFETY: When the literal string is borrowed from the original
949            // source code, it's safe to assume that it's valid UTF-8. This
950            // has been already checked during parsing.
951            Cow::Borrowed(s) => Ok(unsafe { s.to_str_unchecked() }),
952            // When the literal string is owned is because the original string
953            // contained some escaped character. It may contain invalid UTF-8
954            // characters.
955            Cow::Owned(s) => s.to_str(),
956        }
957    }
958}
959
960/// A literal integer (e.g: `1`, `0xAB`).
961#[derive(Debug)]
962pub struct LiteralInteger<'src> {
963    span: Span,
964    /// The literal value as it appears in the source code.
965    pub literal: &'src str,
966    /// The value of the integer literal.
967    pub value: i64,
968}
969
970/// A literal float (e.g: `2.0`, `3.14`).
971#[derive(Debug)]
972pub struct LiteralFloat<'src> {
973    span: Span,
974    /// The literal value as it appears in the source code.
975    pub literal: &'src str,
976    /// The value of the integer literal.
977    pub value: f64,
978}
979
980/// A regular expression in a YARA rule.
981///
982/// Used both as part of a [`RegexpPattern`] and as the right operand
983/// of a `matches` operator.
984#[derive(Debug)]
985pub struct Regexp<'src> {
986    span: Span,
987    /// The regular expressions as it appears in the source code, including
988    /// the opening and closing slashes (`/`), and the modifiers `i` and `s`,
989    /// if they are present.
990    pub literal: &'src str,
991    /// The regexp source code. Doesn't include the opening and closing `/`.
992    pub src: &'src str,
993    /// True if the regular expression was followed by /i
994    pub case_insensitive: bool,
995    /// True if the regular expression was followed by /s
996    pub dot_matches_new_line: bool,
997}
998
999/// An expression with a single operand.
1000#[derive(Debug)]
1001pub struct UnaryExpr<'src> {
1002    span: Span,
1003    pub operand: Expr<'src>,
1004}
1005
1006/// An expression with two operands.
1007#[derive(Debug)]
1008pub struct BinaryExpr<'src> {
1009    /// Left-hand side.
1010    pub lhs: Expr<'src>,
1011    /// Right-hand side.
1012    pub rhs: Expr<'src>,
1013}
1014
1015/// An expression with multiple operands.
1016#[derive(Debug)]
1017pub struct NAryExpr<'src> {
1018    pub operands: Vec<Expr<'src>>,
1019}
1020
1021impl<'src> NAryExpr<'src> {
1022    #[inline]
1023    pub fn operands(&self) -> Iter<'_, Expr<'src>> {
1024        self.operands.iter()
1025    }
1026
1027    #[inline]
1028    pub fn add(&mut self, expr: Expr<'src>) {
1029        self.operands.push(expr);
1030    }
1031
1032    pub fn first(&self) -> &Expr<'src> {
1033        self.operands
1034            .first()
1035            .expect("expression is expected to have at least one operand")
1036    }
1037
1038    pub fn last(&self) -> &Expr<'src> {
1039        self.operands
1040            .last()
1041            .expect("expression is expected to have at least one operand")
1042    }
1043
1044    #[inline]
1045    pub fn as_slice(&self) -> &[Expr<'src>] {
1046        self.operands.as_slice()
1047    }
1048}
1049
1050impl<'src> From<Vec<Expr<'src>>> for NAryExpr<'src> {
1051    fn from(value: Vec<Expr<'src>>) -> Self {
1052        Self { operands: value }
1053    }
1054}
1055
1056/// Trait implemented by every node in the AST that has an associated span.
1057///
1058/// [`WithSpan::span`] returns a [`Span`] that indicates the starting and ending
1059/// position of the AST node in the original source code.
1060pub trait WithSpan {
1061    /// Returns the starting and ending position within the source code for
1062    /// some node in the AST.
1063    fn span(&self) -> Span;
1064}
1065
1066impl WithSpan for LiteralString<'_> {
1067    fn span(&self) -> Span {
1068        self.span.clone()
1069    }
1070}
1071
1072impl WithSpan for LiteralInteger<'_> {
1073    fn span(&self) -> Span {
1074        self.span.clone()
1075    }
1076}
1077
1078impl WithSpan for LiteralFloat<'_> {
1079    fn span(&self) -> Span {
1080        self.span.clone()
1081    }
1082}
1083
1084impl WithSpan for Regexp<'_> {
1085    fn span(&self) -> Span {
1086        self.span.clone()
1087    }
1088}
1089
1090impl WithSpan for HexAlternative {
1091    fn span(&self) -> Span {
1092        self.span.clone()
1093    }
1094}
1095
1096impl WithSpan for HexByte {
1097    fn span(&self) -> Span {
1098        self.span.clone()
1099    }
1100}
1101
1102impl WithSpan for HexJump {
1103    fn span(&self) -> Span {
1104        self.span.clone()
1105    }
1106}
1107
1108impl WithSpan for HexToken {
1109    fn span(&self) -> Span {
1110        match self {
1111            HexToken::Byte(byte) => byte.span(),
1112            HexToken::NotByte(byte) => byte.span(),
1113            HexToken::Alternative(alt) => alt.span(),
1114            HexToken::Jump(jump) => jump.span(),
1115        }
1116    }
1117}
1118
1119impl WithSpan for HexSubPattern {
1120    fn span(&self) -> Span {
1121        let span = self.0.first().map(|t| t.span()).unwrap_or_default();
1122        if self.0.len() == 1 {
1123            return span;
1124        }
1125        span.combine(&self.0.last().map(|t| t.span()).unwrap_or_default())
1126    }
1127}
1128
1129impl WithSpan for Ident<'_> {
1130    fn span(&self) -> Span {
1131        self.span.clone()
1132    }
1133}
1134
1135impl WithSpan for IdentWithIndex<'_> {
1136    fn span(&self) -> Span {
1137        self.span.clone()
1138    }
1139}
1140
1141impl WithSpan for IdentWithRange<'_> {
1142    fn span(&self) -> Span {
1143        self.span.clone()
1144    }
1145}
1146
1147impl WithSpan for Meta<'_> {
1148    fn span(&self) -> Span {
1149        self.identifier.span.combine(&self.value.span())
1150    }
1151}
1152
1153impl WithSpan for MetaValue<'_> {
1154    fn span(&self) -> Span {
1155        match self {
1156            MetaValue::Bool((_, span))
1157            | MetaValue::Integer((_, span))
1158            | MetaValue::Float((_, span))
1159            | MetaValue::String((_, span))
1160            | MetaValue::Bytes((_, span)) => span.clone(),
1161        }
1162    }
1163}
1164
1165impl WithSpan for ForOf<'_> {
1166    fn span(&self) -> Span {
1167        self.span.clone()
1168    }
1169}
1170
1171impl WithSpan for ForIn<'_> {
1172    fn span(&self) -> Span {
1173        self.span.clone()
1174    }
1175}
1176
1177impl WithSpan for Of<'_> {
1178    fn span(&self) -> Span {
1179        self.span.clone()
1180    }
1181}
1182
1183impl WithSpan for OfItems<'_> {
1184    fn span(&self) -> Span {
1185        match self {
1186            OfItems::PatternSet(patterns) => patterns.span(),
1187            OfItems::BoolExprTuple(tuple) => tuple.span(),
1188        }
1189    }
1190}
1191
1192impl WithSpan for With<'_> {
1193    fn span(&self) -> Span {
1194        self.span.clone()
1195    }
1196}
1197
1198impl WithSpan for WithDeclaration<'_> {
1199    fn span(&self) -> Span {
1200        self.span.clone()
1201    }
1202}
1203
1204impl WithSpan for Iterable<'_> {
1205    fn span(&self) -> Span {
1206        match self {
1207            Iterable::Range(range) => range.span(),
1208            Iterable::ExprTuple(tuple) => tuple.span(),
1209            Iterable::Expr(expr) => expr.span(),
1210        }
1211    }
1212}
1213
1214impl WithSpan for Import<'_> {
1215    fn span(&self) -> Span {
1216        self.span.clone()
1217    }
1218}
1219
1220impl WithSpan for Include<'_> {
1221    fn span(&self) -> Span {
1222        self.span.clone()
1223    }
1224}
1225
1226impl WithSpan for FuncCall<'_> {
1227    fn span(&self) -> Span {
1228        self.identifier.span.combine(&self.args_span)
1229    }
1230}
1231
1232impl WithSpan for Pattern<'_> {
1233    fn span(&self) -> Span {
1234        match self {
1235            Pattern::Text(p) => p.span(),
1236            Pattern::Hex(p) => p.span(),
1237            Pattern::Regexp(p) => p.span(),
1238        }
1239    }
1240}
1241
1242impl WithSpan for TextPattern<'_> {
1243    fn span(&self) -> Span {
1244        if self.modifiers.is_empty() {
1245            self.identifier.span().combine(&self.text.span)
1246        } else {
1247            self.identifier.span().combine(&self.modifiers.span())
1248        }
1249    }
1250}
1251
1252impl WithSpan for HexPattern<'_> {
1253    fn span(&self) -> Span {
1254        self.span.clone()
1255    }
1256}
1257
1258impl WithSpan for RegexpPattern<'_> {
1259    fn span(&self) -> Span {
1260        if self.modifiers.is_empty() {
1261            self.identifier.span().combine(&self.regexp.span)
1262        } else {
1263            self.identifier.span().combine(&self.modifiers.span())
1264        }
1265    }
1266}
1267
1268impl WithSpan for Range<'_> {
1269    fn span(&self) -> Span {
1270        self.span.clone()
1271    }
1272}
1273
1274impl WithSpan for PatternSet<'_> {
1275    fn span(&self) -> Span {
1276        match self {
1277            PatternSet::Them { span } => span.clone(),
1278            PatternSet::Set(items) => {
1279                let span =
1280                    items.first().map(|item| item.span()).unwrap_or_default();
1281
1282                if items.len() == 1 {
1283                    return span;
1284                }
1285
1286                span.combine(
1287                    &items.last().map(|item| item.span()).unwrap_or_default(),
1288                )
1289            }
1290        }
1291    }
1292}
1293
1294impl WithSpan for PatternModifier<'_> {
1295    fn span(&self) -> Span {
1296        match self {
1297            PatternModifier::Ascii { span }
1298            | PatternModifier::Wide { span }
1299            | PatternModifier::Nocase { span }
1300            | PatternModifier::Private { span }
1301            | PatternModifier::Fullword { span }
1302            | PatternModifier::Base64 { span, .. }
1303            | PatternModifier::Base64Wide { span, .. }
1304            | PatternModifier::Xor { span, .. } => span.clone(),
1305        }
1306    }
1307}
1308
1309impl WithSpan for PatternModifiers<'_> {
1310    fn span(&self) -> Span {
1311        let span = self
1312            .modifiers
1313            .first()
1314            .expect("calling span() on an empty Vec<PatternModifier>")
1315            .span();
1316
1317        if self.modifiers.len() > 1 {
1318            span.combine(&self.modifiers.last().unwrap().span())
1319        } else {
1320            span
1321        }
1322    }
1323}
1324
1325impl WithSpan for PatternSetItem<'_> {
1326    fn span(&self) -> Span {
1327        self.span.clone()
1328    }
1329}
1330
1331impl WithSpan for Quantifier<'_> {
1332    fn span(&self) -> Span {
1333        match self {
1334            Quantifier::None { span } => span.clone(),
1335            Quantifier::All { span } => span.clone(),
1336            Quantifier::Any { span } => span.clone(),
1337            Quantifier::Percentage(expr) => expr.span(),
1338            Quantifier::Expr(expr) => expr.span(),
1339        }
1340    }
1341}
1342
1343impl WithSpan for UnaryExpr<'_> {
1344    fn span(&self) -> Span {
1345        self.span.clone()
1346    }
1347}
1348
1349impl WithSpan for BinaryExpr<'_> {
1350    fn span(&self) -> Span {
1351        self.lhs.span().combine(&self.rhs.span())
1352    }
1353}
1354
1355impl WithSpan for NAryExpr<'_> {
1356    fn span(&self) -> Span {
1357        self.first().span().combine(&self.last().span())
1358    }
1359}
1360
1361impl WithSpan for &Vec<Expr<'_>> {
1362    fn span(&self) -> Span {
1363        let span =
1364            self.first().expect("calling span() on an empty Vec<Expr>").span();
1365
1366        if self.len() > 1 {
1367            span.combine(&self.last().unwrap().span())
1368        } else {
1369            span
1370        }
1371    }
1372}
1373
1374impl WithSpan for PatternMatch<'_> {
1375    fn span(&self) -> Span {
1376        let mut span = self.identifier.span();
1377        if let Some(anchor) = &self.anchor {
1378            span = span.combine(&anchor.span())
1379        }
1380        span
1381    }
1382}
1383
1384impl WithSpan for MatchAnchor<'_> {
1385    fn span(&self) -> Span {
1386        match self {
1387            MatchAnchor::At(a) => a.span.clone(),
1388            MatchAnchor::In(i) => i.span.clone(),
1389        }
1390    }
1391}
1392
1393impl WithSpan for Expr<'_> {
1394    fn span(&self) -> Span {
1395        match self {
1396            Expr::False { span, .. }
1397            | Expr::True { span, .. }
1398            | Expr::Filesize { span, .. }
1399            | Expr::Entrypoint { span, .. } => span.clone(),
1400
1401            Expr::Defined(expr)
1402            | Expr::Not(expr)
1403            | Expr::Minus(expr)
1404            | Expr::BitwiseNot(expr) => expr.span(),
1405
1406            Expr::Shl(expr)
1407            | Expr::Shr(expr)
1408            | Expr::BitwiseAnd(expr)
1409            | Expr::BitwiseOr(expr)
1410            | Expr::BitwiseXor(expr)
1411            | Expr::Eq(expr)
1412            | Expr::Ne(expr)
1413            | Expr::Lt(expr)
1414            | Expr::Gt(expr)
1415            | Expr::Le(expr)
1416            | Expr::Ge(expr)
1417            | Expr::Contains(expr)
1418            | Expr::IContains(expr)
1419            | Expr::StartsWith(expr)
1420            | Expr::IStartsWith(expr)
1421            | Expr::EndsWith(expr)
1422            | Expr::IEndsWith(expr)
1423            | Expr::IEquals(expr)
1424            | Expr::Matches(expr) => expr.span(),
1425
1426            Expr::And(expr)
1427            | Expr::Or(expr)
1428            | Expr::Add(expr)
1429            | Expr::Sub(expr)
1430            | Expr::Mul(expr)
1431            | Expr::Div(expr)
1432            | Expr::Mod(expr)
1433            | Expr::FieldAccess(expr) => expr.span(),
1434
1435            Expr::LiteralString(s) => s.span.clone(),
1436            Expr::LiteralFloat(f) => f.span.clone(),
1437            Expr::LiteralInteger(i) => i.span.clone(),
1438            Expr::Ident(i) => i.span.clone(),
1439            Expr::Regexp(r) => r.span.clone(),
1440            Expr::Lookup(l) => l.span.clone(),
1441            Expr::FuncCall(f) => f.span(),
1442            Expr::PatternMatch(p) => p.span(),
1443            Expr::PatternCount(p) => p.span(),
1444            Expr::PatternLength(p) => p.span(),
1445            Expr::PatternOffset(p) => p.span(),
1446            Expr::ForOf(f) => f.span(),
1447            Expr::ForIn(f) => f.span(),
1448            Expr::Of(o) => o.span(),
1449            Expr::With(w) => w.span(),
1450        }
1451    }
1452}