Skip to main content

yara_x/
models.rs

1use std::ops::Range;
2use std::slice::Iter;
3
4use bstr::{BStr, ByteSlice};
5use serde::ser::SerializeStruct;
6use serde::{Deserialize, Serialize, Serializer};
7
8use crate::compiler::{IdentId, PatternId, PatternInfo, RuleInfo};
9use crate::scanner::{ScanContext, ScanState};
10use crate::{Rules, compiler, scanner};
11
12/// Kinds of patterns.
13#[derive(Serialize, Deserialize, Clone, Copy)]
14pub enum PatternKind {
15    /// The pattern is a plain text string.
16    Text,
17    /// The pattern is a hex pattern (e.g: { 01 02 03 })
18    Hex,
19    /// The pattern is a regular expression.
20    Regexp,
21}
22
23/// A structure that describes a rule.
24pub struct Rule<'a, 'r> {
25    pub(crate) ctx: Option<&'a ScanContext<'r, 'a>>,
26    pub(crate) rules: &'r Rules,
27    pub(crate) rule_info: &'r RuleInfo,
28}
29
30impl<'a, 'r> Rule<'a, 'r> {
31    /// Returns the rule's name.
32    pub fn identifier(&self) -> &'r str {
33        self.rules.ident_pool().get(self.rule_info.ident_id).unwrap()
34    }
35
36    /// Returns the rule's namespace.
37    pub fn namespace(&self) -> &'r str {
38        self.rules.ident_pool().get(self.rule_info.namespace_ident_id).unwrap()
39    }
40
41    /// Returns the metadata associated to this rule.
42    pub fn metadata(&self) -> Metadata<'a, 'r> {
43        Metadata {
44            rules: self.rules,
45            iterator: self.rule_info.metadata.iter(),
46            len: self.rule_info.metadata.len(),
47        }
48    }
49
50    /// Returns true if the rule is global.
51    pub fn is_global(&self) -> bool {
52        self.rule_info.is_global
53    }
54
55    /// Returns true if the rule is private.
56    pub fn is_private(&self) -> bool {
57        self.rule_info.is_private
58    }
59
60    /// Returns the tags associated to this rule.
61    pub fn tags(&self) -> Tags<'a, 'r> {
62        Tags {
63            rules: self.rules,
64            iterator: self.rule_info.tags.iter(),
65            len: self.rule_info.tags.len(),
66        }
67    }
68
69    /// Returns an iterator over the patterns defined for this rule.
70    ///
71    /// By default, the iterator yields only public patterns. Use
72    /// [`Patterns::include_private`] if you want to include private patterns
73    /// as well.
74    pub fn patterns(&self) -> Patterns<'a, 'r> {
75        Patterns {
76            ctx: self.ctx,
77            rules: self.rules,
78            include_private: false,
79            iterator: self.rule_info.patterns.iter(),
80            len_non_private: self.rule_info.patterns.len()
81                - self.rule_info.num_private_patterns,
82            len_private: self.rule_info.num_private_patterns,
83        }
84    }
85}
86
87impl Serialize for Rule<'_, '_> {
88    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
89    where
90        S: Serializer,
91    {
92        let mut s = serializer.serialize_struct("rule", 7)?;
93
94        s.serialize_field("identifier", &self.identifier())?;
95        s.serialize_field("namespace", &self.namespace())?;
96        s.serialize_field("is_global", &self.is_global())?;
97        s.serialize_field("is_private", &self.is_private())?;
98
99        let metadata: Vec<_> = self.metadata().collect();
100        s.serialize_field("metadata", &metadata)?;
101
102        let tags: Vec<_> = self.tags().collect();
103        s.serialize_field("tags", &tags)?;
104
105        let patterns: Vec<_> = self.patterns().include_private(true).collect();
106        s.serialize_field("patterns", &patterns)?;
107
108        s.end()
109    }
110}
111
112/// A metadata value.
113#[derive(Debug, PartialEq, Serialize)]
114#[serde(untagged)]
115pub enum MetaValue<'r> {
116    /// Integer value.
117    Integer(i64),
118    /// Float value.
119    Float(f64),
120    /// Bool value.
121    Bool(bool),
122    /// A valid UTF-8 string.
123    String(&'r str),
124    /// An arbitrary string. Used when the value contains invalid UTF-8
125    /// characters.
126    Bytes(&'r BStr),
127}
128
129/// Iterator that returns the metadata associated to a rule.
130///
131/// The iterator returns (`&str`, [`MetaValue`]) pairs, where the first item
132/// is the identifier, and the second one the metadata value.
133pub struct Metadata<'a, 'r> {
134    rules: &'r Rules,
135    iterator: Iter<'a, (IdentId, compiler::MetaValue)>,
136    len: usize,
137}
138
139impl<'r> Metadata<'_, 'r> {
140    /// Returns the metadata as a [`serde_json::Value`].
141    ///
142    /// The returned value is an array of tuples `(ident, value)` with all
143    /// the metadata associated to the rule.
144    ///
145    /// ```rust
146    /// # use yara_x;
147    /// let rules = yara_x::compile(r#"
148    /// rule test {
149    ///   meta:
150    ///     some_int = 1
151    ///     some_bool = true
152    ///     some_str = "foo"
153    ///     some_bytes = "\x01\x02\x03"
154    ///   condition:
155    ///     true
156    /// }
157    /// "#).unwrap();
158    ///
159    /// let mut scanner = yara_x::Scanner::new(&rules);
160    ///
161    /// let scan_results = scanner
162    ///     .scan(&[])
163    ///     .unwrap();
164    ///
165    /// let matching_rule = scan_results
166    ///     .matching_rules()
167    ///     .next()
168    ///     .unwrap();
169    ///
170    /// assert_eq!(
171    ///     matching_rule.metadata().into_json(),
172    ///     serde_json::json!([
173    ///         ("some_int", 1),
174    ///         ("some_bool", true),
175    ///         ("some_str", "foo"),
176    ///         ("some_bytes", [0x01, 0x02, 0x03]),
177    ///     ])
178    /// );
179    /// ```
180    pub fn into_json(self) -> serde_json::Value {
181        let v: Vec<(&'r str, MetaValue<'r>)> = self.collect();
182        serde_json::value::to_value(v).unwrap()
183    }
184
185    /// Returns `true` if the rule doesn't have any metadata.
186    #[inline]
187    pub fn is_empty(&self) -> bool {
188        self.iterator.len() == 0
189    }
190}
191
192impl<'r> Iterator for Metadata<'_, 'r> {
193    type Item = (&'r str, MetaValue<'r>);
194
195    fn next(&mut self) -> Option<Self::Item> {
196        let (ident_id, value) = self.iterator.next()?;
197
198        let ident = self.rules.ident_pool().get(*ident_id).unwrap();
199
200        let value = match value {
201            compiler::MetaValue::Bool(b) => MetaValue::Bool(*b),
202            compiler::MetaValue::Integer(i) => MetaValue::Integer(*i),
203            compiler::MetaValue::Float(f) => MetaValue::Float(*f),
204            compiler::MetaValue::String(id) => {
205                let s = self.rules.lit_pool().get(*id).unwrap();
206                // We can be sure that s is a valid UTF-8 string, because
207                // the type of meta is MetaValue::String.
208                let s = unsafe { s.to_str_unchecked() };
209                MetaValue::String(s)
210            }
211            compiler::MetaValue::Bytes(id) => {
212                MetaValue::Bytes(self.rules.lit_pool().get(*id).unwrap())
213            }
214        };
215
216        Some((ident, value))
217    }
218}
219
220impl ExactSizeIterator for Metadata<'_, '_> {
221    #[inline]
222    fn len(&self) -> usize {
223        self.len
224    }
225}
226
227/// An iterator that returns the tags defined by a rule.
228pub struct Tags<'a, 'r> {
229    rules: &'r Rules,
230    iterator: Iter<'a, IdentId>,
231    len: usize,
232}
233
234impl Tags<'_, '_> {
235    /// Returns `true` if the rule doesn't have any tags.
236    #[inline]
237    pub fn is_empty(&self) -> bool {
238        self.iterator.len() == 0
239    }
240}
241
242impl<'r> Iterator for Tags<'_, 'r> {
243    type Item = Tag<'r>;
244
245    fn next(&mut self) -> Option<Self::Item> {
246        let ident_id = self.iterator.next()?;
247        Some(Tag { rules: self.rules, ident_id: *ident_id })
248    }
249}
250
251impl ExactSizeIterator for Tags<'_, '_> {
252    #[inline]
253    fn len(&self) -> usize {
254        self.len
255    }
256}
257
258/// Represents a tag defined by a rule.
259pub struct Tag<'r> {
260    rules: &'r Rules,
261    ident_id: IdentId,
262}
263
264impl<'r> Tag<'r> {
265    /// Returns the tag's identifier.
266    pub fn identifier(&self) -> &'r str {
267        self.rules.ident_pool().get(self.ident_id).unwrap()
268    }
269}
270
271impl<'r> Serialize for Tag<'r> {
272    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
273    where
274        S: Serializer,
275    {
276        serializer.serialize_str(self.identifier())
277    }
278}
279
280/// An iterator that returns the patterns defined by a rule.
281///
282/// By default, the iterator yields only public patterns. Use
283/// [`Patterns::include_private`] if you want to include private patterns
284/// as well.
285pub struct Patterns<'a, 'r> {
286    ctx: Option<&'a ScanContext<'r, 'a>>,
287    rules: &'r Rules,
288    iterator: Iter<'a, PatternInfo>,
289    /// True if the iterator should yield all patterns, including the
290    /// private ones. If false, only the non-private patterns are
291    /// yielded.
292    include_private: bool,
293    /// Number of private patterns that remain to be yielded.
294    len_private: usize,
295    /// Number of non-private patterns that remain to be yielded.
296    len_non_private: usize,
297}
298
299impl Patterns<'_, '_> {
300    /// Specifies whether the iterator should yield private patterns.
301    ///
302    /// This does not reset the iterator to its initial state, the iterator will
303    /// continue from its current position.
304    pub fn include_private(mut self, yes: bool) -> Self {
305        self.include_private = yes;
306        self
307    }
308}
309
310impl ExactSizeIterator for Patterns<'_, '_> {
311    #[inline]
312    fn len(&self) -> usize {
313        if self.include_private {
314            self.len_non_private + self.len_private
315        } else {
316            self.len_non_private
317        }
318    }
319}
320
321impl<'a, 'r> Iterator for Patterns<'a, 'r> {
322    type Item = Pattern<'a, 'r>;
323
324    fn next(&mut self) -> Option<Self::Item> {
325        loop {
326            let pattern = self.iterator.next()?;
327
328            if pattern.is_private {
329                self.len_private -= 1;
330            } else {
331                self.len_non_private -= 1;
332            }
333
334            if self.include_private || !pattern.is_private {
335                return Some(Pattern {
336                    ctx: self.ctx,
337                    rules: self.rules,
338                    ident_id: pattern.ident_id,
339                    pattern_id: pattern.pattern_id,
340                    kind: pattern.kind,
341                    is_private: pattern.is_private,
342                });
343            }
344        }
345    }
346}
347
348/// Represents a pattern defined by a rule.
349pub struct Pattern<'a, 'r> {
350    ctx: Option<&'a ScanContext<'r, 'a>>,
351    rules: &'r Rules,
352    ident_id: IdentId,
353    pattern_id: PatternId,
354    kind: PatternKind,
355    is_private: bool,
356}
357
358impl<'a, 'r> Pattern<'a, 'r> {
359    /// Returns the pattern's identifier (e.g: $a, $b).
360    pub fn identifier(&self) -> &'r str {
361        self.rules.ident_pool().get(self.ident_id).unwrap()
362    }
363
364    /// Returns the kind of this pattern.
365    #[inline]
366    pub fn kind(&self) -> PatternKind {
367        self.kind
368    }
369
370    /// Returns true if the pattern is private.
371    #[inline]
372    pub fn is_private(&self) -> bool {
373        self.is_private
374    }
375
376    /// Returns the matches found for this pattern.
377    pub fn matches(&self) -> Matches<'a, 'r> {
378        Matches {
379            ctx: self.ctx,
380            iterator: self.ctx.and_then(|ctx| {
381                ctx.pattern_matches
382                    .get(self.pattern_id)
383                    .map(|matches| matches.iter())
384            }),
385        }
386    }
387}
388
389impl<'a, 'r> Serialize for Pattern<'a, 'r> {
390    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
391    where
392        S: Serializer,
393    {
394        let mut s = serializer.serialize_struct("pattern", 4)?;
395        s.serialize_field("identifier", &self.identifier())?;
396        s.serialize_field("kind", &self.kind())?;
397        s.serialize_field("is_private", &self.is_private())?;
398        let matches: Vec<_> = self.matches().collect();
399        s.serialize_field("matches", &matches)?;
400        s.end()
401    }
402}
403
404/// Iterator that returns the matches for a pattern.
405pub struct Matches<'a, 'r> {
406    ctx: Option<&'a ScanContext<'r, 'a>>,
407    iterator: Option<Iter<'a, scanner::Match>>,
408}
409
410impl<'a, 'r> Iterator for Matches<'a, 'r> {
411    type Item = Match<'a, 'r>;
412
413    fn next(&mut self) -> Option<Self::Item> {
414        let iter = self.iterator.as_mut()?;
415        Some(Match { ctx: self.ctx?, inner: iter.next()? })
416    }
417}
418
419impl ExactSizeIterator for Matches<'_, '_> {
420    fn len(&self) -> usize {
421        self.iterator.as_ref().map_or(0, |it| it.len())
422    }
423}
424
425/// Represents a match.
426pub struct Match<'a, 'r> {
427    ctx: &'a ScanContext<'r, 'a>,
428    inner: &'a scanner::Match,
429}
430
431impl<'a> Match<'a, '_> {
432    /// Range within the original data where the match occurred.
433    #[inline]
434    pub fn range(&self) -> Range<usize> {
435        self.inner.range.clone()
436    }
437
438    /// Slice containing the data that matched.
439    #[inline]
440    pub fn data(&self) -> &'a [u8] {
441        let data = match &self.ctx.scan_state {
442            ScanState::Finished(snippets) => snippets.get(self.range()),
443            _ => None,
444        };
445
446        data.unwrap()
447    }
448
449    /// XOR key used for decrypting the data if the pattern had the `xor`
450    /// modifier, or `None` if otherwise.
451    #[inline]
452    pub fn xor_key(&self) -> Option<u8> {
453        self.inner.xor_key
454    }
455}
456
457impl<'a, 'r> Serialize for Match<'a, 'r> {
458    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
459    where
460        S: Serializer,
461    {
462        let mut s = serializer.serialize_struct("match", 2)?;
463        s.serialize_field("range", &self.range())?;
464        s.serialize_field("xor_key", &self.xor_key())?;
465        s.end()
466    }
467}