yara_x_parser/lib.rs
1/*! Parses YARA source code and produces either a Concrete Syntax Tree (CST)
2or an Abstract Syntax Tree (AST).
3
4A CST (also known as a lossless syntax tree) is a structured representation of
5the source code that retains all its details, including punctuation, spacing,
6comments, etc. The CST is appropriate for traversing the source code as it
7appears in its original form.
8
9Typical uses of CSTs are code formatters, documentation generators, source
10code analysis tools, etc. One of the limitations of the CST is that it doesn’t
11know about operator’s associativity or precedence rules. Expressions appear in
12the CST as they are in the source code, without any attempt from the parser to
13group them according to operator precedence rules.
14
15In the other hand, an AST is a simplified, more abstract representation of the
16code. The AST drops comments, spacing and syntactic details and focus on the
17code semantics. When building an AST, operator precedence rules are applied,
18providing a more accurate representation of expressions.
19
20Deciding whether to use a CST or AST depends on the kind of problem you want to
21solve.
22 */
23
24use std::fmt::{Display, Formatter};
25use std::ops::Range;
26
27pub use parser::Parser;
28
29#[cfg(feature = "serde")]
30use serde::Serialize;
31
32pub mod ast;
33pub mod cst;
34
35mod parser;
36mod tokenizer;
37
38/// Starting and ending positions of some token inside the source code.
39#[derive(Default, Clone, Debug, Hash, Eq, PartialEq)]
40#[cfg_attr(feature = "serde", derive(Serialize))]
41pub struct Span(pub Range<u32>);
42
43impl From<logos::Span> for Span {
44 fn from(value: logos::Span) -> Self {
45 Self(value.start as u32..value.end as u32)
46 }
47}
48
49impl Display for Span {
50 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
51 write!(f, "[{}..{}]", self.start(), self.end())
52 }
53}
54
55impl Span {
56 const MAX: usize = u32::MAX as usize;
57
58 /// Offset within the source code (in bytes) were the span starts.
59 #[inline]
60 pub fn start(&self) -> usize {
61 self.0.start as usize
62 }
63
64 /// Offset within the source code (in bytes) where the span ends.
65 #[inline]
66 pub fn end(&self) -> usize {
67 self.0.end as usize
68 }
69
70 /// Returns the span as a range of byte offsets.
71 #[inline]
72 pub fn range(&self) -> Range<usize> {
73 self.0.start as usize..self.0.end as usize
74 }
75
76 /// Returns a new [`Span`] that combines this span with `other`.
77 ///
78 /// The resulting span goes from `self.start()` to `other.end()`.
79 pub fn combine(&self, other: &Self) -> Self {
80 Self(self.0.start..other.0.end)
81 }
82
83 /// Returns true if this span completely contains `other`.
84 ///
85 /// Both the start and end of the `other` span must be within the limits of
86 /// this span.
87 ///
88 /// ```
89 /// # use yara_x_parser::Span;
90 /// assert!(Span(0..3).contains(&Span(0..2)));
91 /// assert!(Span(0..3).contains(&Span(1..3)));
92 /// assert!(Span(0..3).contains(&Span(0..3)));
93 /// assert!(!Span(0..3).contains(&Span(0..4)));
94 /// assert!(!Span(0..3).contains(&Span(3..4)));
95 /// ```
96 pub fn contains(&self, other: &Self) -> bool {
97 self.0.contains(&other.0.start)
98 && self.0.contains(&other.0.end.saturating_sub(1))
99 }
100
101 /// Returns a new [`Span`] that is a subspan of the original one.
102 ///
103 /// `start` and `end` are the starting and ending offset of the subspan,
104 /// relative to the start of the original span.
105 pub fn subspan(&self, start: usize, end: usize) -> Span {
106 assert!(start <= self.end() - self.start());
107 assert!(end <= self.end() - self.start());
108 Self(self.0.start + start as u32..self.0.start + end as u32)
109 }
110
111 /// Displace the span by adding `offset` to both the starting and
112 /// ending positions.
113 ///
114 /// ```
115 /// # use yara_x_parser::Span;
116 /// assert_eq!(Span(0..1).offset(1), Span(1..2));
117 /// assert_eq!(Span(1..2).offset(-1), Span(0..1));
118 /// ```
119 ///
120 /// # Panics
121 ///
122 /// Panics if the new span has a start or end positions that are
123 /// negative or larger than `Span::MAX`.
124 ///
125 /// ```should_panic
126 /// # use yara_x_parser::Span;
127 /// Span(0..1).offset(-1);
128 /// ```
129 pub fn offset(mut self, offset: isize) -> Self {
130 if offset.is_negative() {
131 let offset = offset.unsigned_abs() as u32;
132 self.0.start = self.0.start.checked_sub(offset).unwrap();
133 self.0.end = self.0.end.checked_sub(offset).unwrap();
134 } else {
135 let offset = offset as u32;
136 self.0.start = self.0.start.checked_add(offset).unwrap();
137 self.0.end = self.0.end.checked_add(offset).unwrap();
138 }
139 self
140 }
141}