yara_x_parser/
lib.rs

1/*! Parses YARA source code and produces either a Concrete Syntax Tree (CST)
2or an Abstract Syntax Tree (AST).
3
4A CST (also known as a lossless syntax tree) is a structured representation of
5the source code that retains all its details, including punctuation, spacing,
6comments, etc. The CST is appropriate for traversing the source code as it
7appears in its original form.
8
9Typical uses of CSTs are code formatters, documentation generators, source
10code analysis tools, etc. One of the limitations of the CST is that it doesn’t
11know about operator’s associativity or precedence rules. Expressions appear in
12the CST as they are in the source code, without any attempt from the parser to
13group them according to operator precedence rules.
14
15In the other hand, an AST is a simplified, more abstract representation of the
16code. The AST drops comments, spacing and syntactic details and focus on the
17code semantics. When building an AST, operator precedence rules are applied,
18providing a more accurate representation of expressions.
19
20Deciding whether to use a CST or AST depends on the kind of problem you want to
21solve.
22 */
23
24use std::fmt::{Display, Formatter};
25use std::ops::Range;
26
27pub use parser::Parser;
28
29#[cfg(feature = "serde")]
30use serde::Serialize;
31
32pub mod ast;
33pub mod cst;
34
35mod parser;
36mod tokenizer;
37
38/// Starting and ending positions of some token inside the source code.
39#[derive(Default, Clone, Debug, Hash, Eq, PartialEq)]
40#[cfg_attr(feature = "serde", derive(Serialize))]
41pub struct Span(pub Range<u32>);
42
43impl From<logos::Span> for Span {
44    fn from(value: logos::Span) -> Self {
45        Self(value.start as u32..value.end as u32)
46    }
47}
48
49impl Display for Span {
50    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
51        write!(f, "[{}..{}]", self.start(), self.end())
52    }
53}
54
55impl Span {
56    const MAX: usize = u32::MAX as usize;
57
58    /// Offset within the source code (in bytes) were the span starts.
59    #[inline]
60    pub fn start(&self) -> usize {
61        self.0.start as usize
62    }
63
64    /// Offset within the source code (in bytes) where the span ends.
65    #[inline]
66    pub fn end(&self) -> usize {
67        self.0.end as usize
68    }
69
70    /// Returns the span as a range of byte offsets.
71    #[inline]
72    pub fn range(&self) -> Range<usize> {
73        self.0.start as usize..self.0.end as usize
74    }
75
76    /// Returns a new [`Span`] that combines this span with `other`.
77    ///
78    /// The resulting span goes from `self.start()` to `other.end()`.
79    pub fn combine(&self, other: &Self) -> Self {
80        Self(self.0.start..other.0.end)
81    }
82
83    /// Returns true if this span completely contains `other`.
84    ///
85    /// Both the start and end of the `other` span must be within the limits of
86    /// this span.
87    ///
88    /// ```
89    /// # use yara_x_parser::Span;
90    /// assert!(Span(0..3).contains(&Span(0..2)));
91    /// assert!(Span(0..3).contains(&Span(1..3)));
92    /// assert!(Span(0..3).contains(&Span(0..3)));
93    /// assert!(!Span(0..3).contains(&Span(0..4)));
94    /// assert!(!Span(0..3).contains(&Span(3..4)));
95    /// ```
96    pub fn contains(&self, other: &Self) -> bool {
97        self.0.contains(&other.0.start)
98            && self.0.contains(&other.0.end.saturating_sub(1))
99    }
100
101    /// Returns a new [`Span`] that is a subspan of the original one.
102    ///
103    /// `start` and `end` are the starting and ending offset of the subspan,
104    /// relative to the start of the original span.
105    pub fn subspan(&self, start: usize, end: usize) -> Span {
106        assert!(start <= self.end() - self.start());
107        assert!(end <= self.end() - self.start());
108        Self(self.0.start + start as u32..self.0.start + end as u32)
109    }
110
111    /// Displace the span by adding `offset` to both the starting and
112    /// ending positions.
113    ///
114    /// ```
115    /// # use yara_x_parser::Span;
116    /// assert_eq!(Span(0..1).offset(1), Span(1..2));
117    /// assert_eq!(Span(1..2).offset(-1), Span(0..1));
118    /// ```
119    ///
120    /// # Panics
121    ///
122    /// Panics if the new span has a start or end positions that are
123    /// negative or larger than `Span::MAX`.
124    ///
125    /// ```should_panic
126    /// # use yara_x_parser::Span;
127    /// Span(0..1).offset(-1);
128    /// ```
129    pub fn offset(mut self, offset: isize) -> Self {
130        if offset.is_negative() {
131            let offset = offset.unsigned_abs() as u32;
132            self.0.start = self.0.start.checked_sub(offset).unwrap();
133            self.0.end = self.0.end.checked_sub(offset).unwrap();
134        } else {
135            let offset = offset as u32;
136            self.0.start = self.0.start.checked_add(offset).unwrap();
137            self.0.end = self.0.end.checked_add(offset).unwrap();
138        }
139        self
140    }
141}