Skip to main content

yara_x_parser/
lib.rs

1/*! Parses YARA source code and produces either a Concrete Syntax Tree (CST)
2or an Abstract Syntax Tree (AST).
3
4A CST (also known as a lossless syntax tree) is a structured representation of
5the source code that retains all its details, including punctuation, spacing,
6comments, etc. The CST is appropriate for traversing the source code as it
7appears in its original form.
8
9Typical uses of CSTs are code formatters, documentation generators, source
10code analysis tools, etc. One of the limitations of the CST is that it doesn’t
11know about operator’s associativity or precedence rules. Expressions appear in
12the CST as they are in the source code, without any attempt from the parser to
13group them according to operator precedence rules.
14
15In the other hand, an AST is a simplified, more abstract representation of the
16code. The AST drops comments, spacing and syntactic details and focus on the
17code semantics. When building an AST, operator precedence rules are applied,
18providing a more accurate representation of expressions.
19
20Deciding whether to use a CST or AST depends on the kind of problem you want to
21solve.
22 */
23
24use std::fmt::{Display, Formatter};
25use std::ops::Range;
26
27pub use parser::Parser;
28
29#[cfg(feature = "serde")]
30use serde::Serialize;
31
32pub mod ast;
33pub mod cst;
34
35mod parser;
36mod tokenizer;
37
38/// Starting and ending positions of some token inside the source code.
39#[derive(Default, Clone, Debug, Hash, Eq, PartialEq)]
40#[cfg_attr(feature = "serde", derive(Serialize))]
41pub struct Span(pub Range<u32>);
42
43impl From<logos::Span> for Span {
44    fn from(value: logos::Span) -> Self {
45        Self(value.start as u32..value.end as u32)
46    }
47}
48
49impl From<rowan::TextRange> for Span {
50    fn from(value: rowan::TextRange) -> Self {
51        Self(value.start().into()..value.end().into())
52    }
53}
54
55impl Display for Span {
56    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
57        write!(f, "[{}..{}]", self.start(), self.end())
58    }
59}
60
61impl Span {
62    const MAX: usize = u32::MAX as usize;
63
64    /// Offset within the source code (in bytes) were the span starts.
65    #[inline]
66    pub fn start(&self) -> usize {
67        self.0.start as usize
68    }
69
70    /// Offset within the source code (in bytes) where the span ends.
71    #[inline]
72    pub fn end(&self) -> usize {
73        self.0.end as usize
74    }
75
76    /// Returns the span as a range of byte offsets.
77    #[inline]
78    pub fn range(&self) -> Range<usize> {
79        self.0.start as usize..self.0.end as usize
80    }
81
82    /// Returns the length of the span.
83    ///
84    /// ```
85    /// # use yara_x_parser::Span;
86    /// assert_eq!(Span(0..3).len(), 3);
87    /// assert_eq!(Span(1..3).len(), 2);  
88    /// ```
89    #[inline]
90    pub fn len(&self) -> usize {
91        self.range().len()
92    }
93
94    /// Returns true of the span is empty.
95    ///
96    /// ```
97    /// # use yara_x_parser::Span;
98    /// assert!(Span(0..0).is_empty());
99    /// assert!(Span(1..1).is_empty());
100    /// assert!(!Span(1..2).is_empty());
101    /// ```
102    #[inline]
103    pub fn is_empty(&self) -> bool {
104        self.range().is_empty()
105    }
106
107    /// Returns a new [`Span`] that combines this span with `other`.
108    ///
109    /// The resulting span goes from `self.start()` to `other.end()`.
110    pub fn combine(&self, other: &Self) -> Self {
111        Self(self.0.start..other.0.end)
112    }
113
114    /// Returns true if this span completely contains `other`.
115    ///
116    /// Both the start and end of the `other` span must be within the limits of
117    /// this span.
118    ///
119    /// ```
120    /// # use yara_x_parser::Span;
121    /// assert!(Span(0..3).contains(&Span(0..2)));
122    /// assert!(Span(0..3).contains(&Span(1..3)));
123    /// assert!(Span(0..3).contains(&Span(0..3)));
124    /// assert!(!Span(0..3).contains(&Span(0..4)));
125    /// assert!(!Span(0..3).contains(&Span(3..4)));
126    /// ```
127    pub fn contains(&self, other: &Self) -> bool {
128        self.0.contains(&other.0.start)
129            && self.0.contains(&other.0.end.saturating_sub(1))
130    }
131
132    /// Returns a new [`Span`] that is a subspan of the original one.
133    ///
134    /// `start` and `end` are the starting and ending offset of the subspan,
135    /// relative to the start of the original span.
136    pub fn subspan(&self, start: usize, end: usize) -> Span {
137        assert!(start <= self.end() - self.start());
138        assert!(end <= self.end() - self.start());
139        Self(self.0.start + start as u32..self.0.start + end as u32)
140    }
141
142    /// Displace the span by adding `offset` to both the starting and
143    /// ending positions.
144    ///
145    /// ```
146    /// # use yara_x_parser::Span;
147    /// assert_eq!(Span(0..1).offset(1), Span(1..2));
148    /// assert_eq!(Span(1..2).offset(-1), Span(0..1));
149    /// ```
150    ///
151    /// # Panics
152    ///
153    /// Panics if the new span has a start or end positions that are
154    /// negative or larger than `Span::MAX`.
155    ///
156    /// ```should_panic
157    /// # use yara_x_parser::Span;
158    /// Span(0..1).offset(-1);
159    /// ```
160    pub fn offset(mut self, offset: isize) -> Self {
161        if offset.is_negative() {
162            let offset = offset.unsigned_abs() as u32;
163            self.0.start = self.0.start.checked_sub(offset).unwrap();
164            self.0.end = self.0.end.checked_sub(offset).unwrap();
165        } else {
166            let offset = offset as u32;
167            self.0.start = self.0.start.checked_add(offset).unwrap();
168            self.0.end = self.0.end.checked_add(offset).unwrap();
169        }
170        self
171    }
172}