1use std::borrow::Cow;
2use std::collections::BTreeMap;
3use std::fmt::{Display, Write};
4use std::fs::File;
5use std::io::{self, BufReader, Read};
6use std::ops::Range;
7use std::path::{Path, PathBuf};
8use std::str::FromStr;
9
10use nom::branch::alt;
11use nom::bytes::complete::is_a;
12use nom::character::complete::space0;
13use nom::error::{Error, ErrorKind};
14use nom::multi::fold_many1;
15use nom::sequence::delimited;
16use nom::{Err, Finish, IResult};
17
18use regex::bytes::RegexBuilder;
19pub use regex::bytes::{Match, Regex};
20
21use serde::de::{SeqAccess, Visitor};
22use serde::ser::SerializeSeq;
23use serde::{Deserialize, Deserializer, Serialize, Serializer};
24use thiserror::Error;
25
26use crate::common::Language;
27
28#[derive(Debug, Error)]
29pub enum PatternError {
30 #[error("cannot parse pattern: {0:?}")]
31 Parse(ErrorKind),
32}
33
34#[derive(Clone)]
35pub struct Pattern {
36 data: Vec<u8>,
37 mask: Vec<u8>,
38 norm: Regex,
39 norm_pfx: Regex,
40 bits: u32,
41}
42
43const HTABLE: [u8; 16] = [
44 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
45];
46
47const VTABLE_02: [u8; 256] = [
48 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
49 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
50 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
51 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
52 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
53 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
54 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
55 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
56 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
57 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
58 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
59 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
60 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
61 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
62];
63
64const MTABLE_02: [u8; 256] = [
65 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
66 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
67 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
68 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
69 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
70 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
71 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
72 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
73 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
74 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
75 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
76 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
77 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
78 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
79];
80
81const VTABLE_16: [u8; 256] = [
82 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
83 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
84 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
85 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0,
86 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
87 0x0, 0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
88 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
89 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
90 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
91 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
92 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
93 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
94 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
95 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
96];
97
98const MTABLE_16: [u8; 256] = [
99 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
100 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
101 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x0, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
102 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
103 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
104 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
105 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
106 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
107 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
108 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
109 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
110 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
111 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
112 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
113];
114
115impl Pattern {
116 fn parse_hex(input: &str) -> IResult<&str, (u8, u8)> {
117 let (input, pattern) = is_a(b"0123456789abcdefABCDEF.".as_ref())(input)?;
118 if pattern.len() != 2 {
119 return Err(Err::Error(Error::new(input, ErrorKind::Satisfy)));
120 }
121
122 let bytes = pattern.as_bytes();
123
124 let v0 = VTABLE_16[bytes[0] as usize] << 4;
125 let m0 = MTABLE_16[bytes[0] as usize] << 4;
126
127 let v1 = VTABLE_16[bytes[1] as usize];
128 let m1 = MTABLE_16[bytes[1] as usize];
129
130 let v = v0 | v1;
131 let m = m0 | m1;
132
133 Ok((input, (v, m)))
134 }
135
136 fn parse_bin(input: &str) -> IResult<&str, (u8, u8)> {
137 let (input, pattern) = is_a(b"01.".as_ref())(input)?;
138 if pattern.len() != 8 {
139 return Err(Err::Error(Error::new(input, ErrorKind::Satisfy)));
140 }
141
142 let bytes = pattern.as_bytes();
143
144 let v0 = VTABLE_02[bytes[0] as usize] << 7;
145 let m0 = MTABLE_02[bytes[0] as usize] << 7;
146
147 let v1 = VTABLE_02[bytes[1] as usize] << 6;
148 let m1 = MTABLE_02[bytes[1] as usize] << 6;
149
150 let v2 = VTABLE_02[bytes[2] as usize] << 5;
151 let m2 = MTABLE_02[bytes[2] as usize] << 5;
152
153 let v3 = VTABLE_02[bytes[3] as usize] << 4;
154 let m3 = MTABLE_02[bytes[3] as usize] << 4;
155
156 let v4 = VTABLE_02[bytes[4] as usize] << 3;
157 let m4 = MTABLE_02[bytes[4] as usize] << 3;
158
159 let v5 = VTABLE_02[bytes[5] as usize] << 2;
160 let m5 = MTABLE_02[bytes[5] as usize] << 2;
161
162 let v6 = VTABLE_02[bytes[6] as usize] << 1;
163 let m6 = MTABLE_02[bytes[6] as usize] << 1;
164
165 let v7 = VTABLE_02[bytes[7] as usize];
166 let m7 = MTABLE_02[bytes[7] as usize];
167
168 let v = v0 | v1 | v2 | v3 | v4 | v5 | v6 | v7;
169 let m = m0 | m1 | m2 | m3 | m4 | m5 | m6 | m7;
170
171 Ok((input, (v, m)))
172 }
173
174 fn parse_one(input: &str) -> IResult<&str, (u8, u8)> {
175 alt((Self::parse_hex, Self::parse_bin))(input)
176 }
177
178 fn parse(input: &str) -> IResult<&str, Pattern> {
179 let (input, (data, mask, regex)) = fold_many1(
180 delimited(space0, Self::parse_one, space0),
181 || (Vec::new(), Vec::new(), String::from("^")),
182 |(mut acc_v, mut acc_m, mut acc_r), (v, m)| {
183 acc_v.push(v);
184 acc_m.push(m);
185 if m != 0xff {
186 acc_r.push('.');
187 } else {
188 acc_r.push_str("\\x");
189 acc_r.push(HTABLE[(v >> 4) as usize] as char);
190 acc_r.push(HTABLE[(v & 0xf) as usize] as char);
191 }
192 (acc_v, acc_m, acc_r)
193 },
194 )(input)?;
195
196 let norm = RegexBuilder::new(®ex[1..])
197 .unicode(false)
198 .dot_matches_new_line(true)
199 .build()
200 .map_err(|_| Err::Error(Error::new(input, ErrorKind::MapRes)))?;
201
202 let norm_pfx = RegexBuilder::new(®ex)
203 .unicode(false)
204 .dot_matches_new_line(true)
205 .build()
206 .map_err(|_| Err::Error(Error::new(input, ErrorKind::MapRes)))?;
207
208 let bits = mask.iter().map(|b| b.count_ones()).sum::<u32>();
209
210 Ok((
211 input,
212 Self {
213 data,
214 mask,
215 norm,
216 norm_pfx,
217 bits,
218 },
219 ))
220 }
221
222 pub fn is_match_at(&self, bytes: impl AsRef<[u8]>, offset: usize) -> bool {
223 let bytes = bytes.as_ref();
224 offset < bytes.len()
225 && bytes.len() - offset >= self.data.len()
226 && self
227 .data
228 .iter()
229 .zip(bytes.iter().zip(self.mask.iter()))
230 .all(|(v, (b, m))| *v == b & m)
231 }
232
233 pub fn is_match(&self, bytes: impl AsRef<[u8]>) -> bool {
234 self.is_match_at(bytes, 0)
235 }
236
237 pub fn matches_start(&self, bytes: impl AsRef<[u8]>) -> bool {
238 self.is_match(bytes)
239 }
240
241 pub fn matches_end(&self, bytes: impl AsRef<[u8]>) -> bool {
242 let bytes = bytes.as_ref();
243 if bytes.len() < self.data.len() {
244 return false;
245 }
246 let (_, suffix) = bytes.split_at(bytes.len() - self.data.len());
247 self.is_match(suffix)
248 }
249
250 pub fn normalised_matcher(&self) -> &Regex {
251 &self.norm
252 }
253
254 pub fn normalised_prefix_matcher(&self) -> &Regex {
255 &self.norm_pfx
256 }
257
258 pub fn find_iter<'a>(&'a self, bytes: &'a [u8]) -> impl Iterator<Item = Match<'a>> {
259 self.normalised_matcher()
260 .find_iter(bytes)
261 .filter(|m| self.is_match(m.as_bytes()))
262 }
263
264 pub fn bits(&self) -> u32 {
265 self.bits
266 }
267
268 pub fn len(&self) -> usize {
269 self.data.len()
270 }
271}
272
273impl Display for Pattern {
274 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275 for (i, (v, m)) in self.data.iter().zip(self.mask.iter()).enumerate() {
276 if i != 0 {
277 f.write_char(' ')?;
278 }
279 match m {
280 0x00 => f.write_str("..")?,
281 0x0f => {
282 f.write_char('.')?;
283 f.write_char(HTABLE[(v & 0xf) as usize] as char)?;
284 }
285 0xf0 => {
286 f.write_char(HTABLE[(v >> 4) as usize] as char)?;
287 f.write_char('.')?;
288 }
289 0xff => {
290 f.write_char(HTABLE[(v >> 4) as usize] as char)?;
291 f.write_char(HTABLE[(v & 0xf) as usize] as char)?;
292 }
293 _ => {
294 for i in (0..8).rev() {
295 if m >> i & 1 == 0 {
296 f.write_char('.')?;
297 } else {
298 f.write_char(HTABLE[(v >> i & 1) as usize] as char)?;
299 }
300 }
301 }
302 }
303 }
304 Ok(())
305 }
306}
307
308impl FromStr for Pattern {
309 type Err = PatternError;
310
311 fn from_str(s: &str) -> Result<Self, Self::Err> {
312 let (input, pattern) = Pattern::parse(s)
313 .finish()
314 .map_err(|k| PatternError::Parse(k.code))?;
315
316 if !input.is_empty() {
317 return Err(PatternError::Parse(ErrorKind::Complete));
318 }
319
320 Ok(pattern)
321 }
322}
323
324impl<'de> Deserialize<'de> for Pattern {
325 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
326 where
327 D: Deserializer<'de>,
328 {
329 let s = Cow::<str>::deserialize(deserializer)?;
330 Self::from_str(&*s).map_err(<D::Error as serde::de::Error>::custom)
331 }
332}
333
334impl Serialize for Pattern {
335 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
336 where
337 S: Serializer,
338 {
339 serializer.collect_str(self)
340 }
341}
342
343#[derive(Debug, Error)]
344pub enum PatternSetError {
345 #[error("cannot parse patterns: {0}")]
346 Parse(serde_yaml::Error),
347 #[error("cannot parse patterns from `{0}`: {1}")]
348 ParseFile(PathBuf, serde_yaml::Error),
349 #[error("cannot parse patterns from `{0}`: {1}")]
350 ReadFile(PathBuf, io::Error),
351}
352
353#[derive(Clone)]
354pub struct PatternSet {
355 architecture: Language,
356 groups: Vec<PatternGroup>,
357 patterns: Vec<PatternsWithContext>,
358}
359
360pub struct PatternSetMatchIter<'a>(
361 Box<dyn Iterator<Item = (Range<usize>, &'a PatternContext)> + 'a>,
362);
363
364impl<'a> Iterator for PatternSetMatchIter<'a> {
365 type Item = (Range<usize>, &'a PatternContext);
366
367 fn next(&mut self) -> Option<Self::Item> {
368 self.0.next()
369 }
370}
371
372impl PatternSet {
373 pub fn from_str(input: impl AsRef<str>) -> Result<PatternSet, PatternSetError> {
374 serde_yaml::from_str(input.as_ref()).map_err(PatternSetError::Parse)
375 }
376
377 pub fn from_reader(reader: impl Read) -> Result<PatternSet, PatternSetError> {
378 serde_yaml::from_reader(reader).map_err(PatternSetError::Parse)
379 }
380
381 pub fn from_file(path: impl AsRef<Path>) -> Result<PatternSet, PatternSetError> {
382 let path = path.as_ref();
383 let file = BufReader::new(
384 File::open(path).map_err(|e| PatternSetError::ReadFile(path.to_owned(), e))?,
385 );
386 serde_yaml::from_reader(file).map_err(|e| PatternSetError::ParseFile(path.to_owned(), e))
387 }
388
389 pub fn architecture(&self) -> &Language {
390 &self.architecture
391 }
392
393 pub fn matches<'a>(&'a self, bytes: &'a [u8]) -> PatternSetMatchIter<'a> {
394 PatternSetMatchIter(Box::new(
395 self.groups
396 .iter()
397 .flat_map(|group| group.matches(bytes))
398 .chain(
399 self.patterns
400 .iter()
401 .flat_map(|pattern| pattern.matches(bytes)),
402 ),
403 ))
404 }
405}
406
407impl<'de> Deserialize<'de> for PatternSet {
408 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
409 where
410 D: Deserializer<'de>,
411 {
412 let ps = PatternSetT::deserialize(deserializer)?;
413
414 Ok(Self {
415 architecture: ps.architecture.into_owned(),
416 groups: ps.patterns.groups.into_owned(),
417 patterns: ps.patterns.patterns.into_owned(),
418 })
419 }
420}
421
422impl Serialize for PatternSet {
423 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
424 where
425 S: Serializer,
426 {
427 let t = PatternSetT {
428 architecture: Cow::Borrowed(&self.architecture),
429 patterns: PatternOrGroupSeq {
430 groups: Cow::Borrowed(&self.groups),
431 patterns: Cow::Borrowed(&self.patterns),
432 },
433 };
434
435 t.serialize(serializer)
436 }
437}
438
439#[derive(Clone)]
440pub struct Patterns {
441 groups: Vec<PatternGroup>,
442 patterns: Vec<PatternsWithContext>,
443}
444
445impl Patterns {
446 pub fn merge(&mut self, other: Self) {
447 self.groups.extend(other.groups);
448 self.patterns.extend(other.patterns);
449 }
450
451 pub fn matches<'a>(&'a self, bytes: &'a [u8]) -> PatternSetMatchIter<'a> {
452 PatternSetMatchIter(Box::new(
453 self.groups
454 .iter()
455 .flat_map(|group| group.matches(bytes))
456 .chain(
457 self.patterns
458 .iter()
459 .flat_map(|pattern| pattern.matches(bytes)),
460 ),
461 ))
462 }
463}
464
465impl<'de> Deserialize<'de> for Patterns {
466 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
467 where
468 D: Deserializer<'de>,
469 {
470 let ps = PatternOrGroupSeq::deserialize(deserializer)?;
471
472 Ok(Self {
473 groups: ps.groups.into_owned(),
474 patterns: ps.patterns.into_owned(),
475 })
476 }
477}
478
479impl Serialize for Patterns {
480 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
481 where
482 S: Serializer,
483 {
484 let t = PatternOrGroupSeq {
485 groups: Cow::Borrowed(&self.groups),
486 patterns: Cow::Borrowed(&self.patterns),
487 };
488
489 t.serialize(serializer)
490 }
491}
492
493#[derive(Deserialize, Serialize)]
494#[serde(untagged)]
495enum PatternOrGroup<'a> {
496 Pattern {
497 pattern: Cow<'a, PatternsWithContext>,
498 },
499 PatternGroup {
500 #[serde(rename = "pattern-group")]
501 pattern_group: Cow<'a, PatternGroup>,
502 },
503}
504
505#[derive(Deserialize, Serialize)]
506struct PatternSetT<'a> {
507 architecture: Cow<'a, Language>,
508 #[serde(bound(deserialize = "PatternOrGroupSeq<'a>: Deserialize<'de>"))]
509 patterns: PatternOrGroupSeq<'a>,
510}
511
512#[derive(Clone)]
513pub(crate) struct PatternOrGroupSeq<'a> {
514 groups: Cow<'a, [PatternGroup]>,
515 patterns: Cow<'a, [PatternsWithContext]>,
516}
517
518impl<'de> Deserialize<'de> for PatternOrGroupSeq<'_> {
519 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
520 where
521 D: Deserializer<'de>,
522 {
523 struct Visit;
524
525 impl<'de> Visitor<'de> for Visit {
526 type Value = PatternOrGroupSeq<'static>;
527
528 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
529 formatter.write_str("struct PatternOrGroupSeq")
530 }
531
532 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
533 where
534 A: SeqAccess<'de>,
535 {
536 let mut groups = Vec::new();
537 let mut patterns = Vec::new();
538
539 #[derive(Deserialize)]
540 #[serde(untagged)]
541 enum PatternOrGroupOwned {
542 Pattern {
543 pattern: PatternsWithContext,
544 },
545 PatternGroup {
546 #[serde(rename = "pattern-group")]
547 pattern_group: PatternGroup,
548 },
549 }
550
551 while let Some(pg) = seq.next_element::<PatternOrGroupOwned>()? {
552 match pg {
553 PatternOrGroupOwned::Pattern { pattern: p } => {
554 patterns.push(p);
555 }
556 PatternOrGroupOwned::PatternGroup { pattern_group: g } => {
557 groups.push(g);
558 }
559 }
560 }
561
562 Ok(PatternOrGroupSeq {
563 groups: Cow::Owned(groups),
564 patterns: Cow::Owned(patterns),
565 })
566 }
567 }
568
569 deserializer.deserialize_seq(Visit)
570 }
571}
572
573impl Serialize for PatternOrGroupSeq<'_> {
574 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
575 where
576 S: Serializer,
577 {
578 let mut ser = serializer.serialize_seq(Some(self.patterns.len() + self.groups.len()))?;
579
580 for g in self.groups.iter() {
581 ser.serialize_element(&PatternOrGroup::PatternGroup {
582 pattern_group: Cow::Borrowed(g),
583 })?;
584 }
585
586 for p in self.patterns.iter() {
587 ser.serialize_element(&PatternOrGroup::Pattern {
588 pattern: Cow::Borrowed(p),
589 })?;
590 }
591
592 ser.end()
593 }
594}
595
596#[derive(Clone, Deserialize, Serialize)]
597pub struct PatternGroup {
598 #[serde(rename = "total-bits")]
599 total_bits: u32,
600 #[serde(rename = "post-bits")]
601 post_bits: u32,
602 #[serde(rename = "post")]
603 post_patterns: PatternsWithContext,
604 #[serde(rename = "pre")]
605 pre_patterns: Vec<Pattern>,
606}
607
608impl PatternGroup {
609 pub fn matches<'a>(
610 &'a self,
611 bytes: &'a [u8],
612 ) -> impl Iterator<Item = (Range<usize>, &'a PatternContext)> + 'a {
613 let bytes = bytes.as_ref();
614
615 self.post_patterns.patterns.iter().flat_map(|pattern| {
616 pattern
617 .normalised_matcher()
618 .find_iter(bytes)
619 .filter_map(|m| {
620 if m.start() == 0 {
622 return None;
623 }
624
625 let pre_bytes = &bytes[..m.start()];
626 let post_bits = pattern.bits();
627
628 if pattern.matches_start(m.as_bytes())
629 && self.pre_patterns.iter().any(|pre_pattern| {
630 pre_pattern.matches_end(pre_bytes)
633 && pre_pattern.bits() + post_bits >= self.total_bits
634 })
635 {
636 Some((m.range(), &self.post_patterns.context))
637 } else {
638 None
639 }
640 })
641 })
642 }
643}
644
645#[derive(Clone, Serialize)]
646pub struct PatternsWithContext {
647 patterns: Vec<Pattern>,
648 #[serde(default)]
649 context: PatternContext,
650}
651
652impl<'de> Deserialize<'de> for PatternsWithContext {
653 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
654 where
655 D: Deserializer<'de>,
656 {
657 #[derive(Deserialize)]
658 #[serde(untagged)]
659 enum PatternWithContexT {
660 Pattern(Pattern),
661 PatternWithContext {
662 patterns: Vec<Pattern>,
663 #[serde(default)]
664 context: PatternContext,
665 },
666 }
667
668 let result = match PatternWithContexT::deserialize(deserializer)? {
669 PatternWithContexT::Pattern(pattern) => Self {
670 patterns: vec![pattern],
671 context: PatternContext::default(),
672 },
673 PatternWithContexT::PatternWithContext { patterns, context } => {
674 Self { patterns, context }
675 }
676 };
677
678 Ok(result)
679 }
680}
681
682impl PatternsWithContext {
683 pub fn matches<'a>(
684 &'a self,
685 bytes: &'a [u8],
686 ) -> impl Iterator<Item = (Range<usize>, &'a PatternContext)> + 'a {
687 let bytes = bytes.as_ref();
688
689 self.patterns.iter().flat_map(|pattern| {
690 pattern
691 .normalised_matcher()
692 .find_iter(bytes)
693 .filter_map(|m| {
694 if pattern.is_match(m.as_bytes()) {
695 Some((m.range(), &self.context))
696 } else {
697 None
698 }
699 })
700 })
701 }
702
703 pub fn matches_exact<'a>(&'a self, bytes: &'a [u8]) -> bool {
704 self.patterns.iter().any(|pattern| {
705 bytes.len() == pattern.len()
706 && pattern.normalised_prefix_matcher().is_match(&bytes)
707 && pattern.is_match(bytes)
708 })
709 }
710}
711
712#[derive(Clone, Deserialize, Serialize)]
713struct PatternContextItem<'a> {
714 name: Cow<'a, String>,
715 value: u32,
716}
717
718#[derive(Clone, Default)]
719#[repr(transparent)]
720pub struct PatternContext(BTreeMap<String, u32>);
721
722impl PatternContext {
723 pub fn variables(&self) -> impl Iterator<Item = (&str, u32)> {
724 self.0.iter().map(|(name, value)| (&**name, *value))
725 }
726}
727
728impl<'de> Deserialize<'de> for PatternContext {
729 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
730 where
731 D: Deserializer<'de>,
732 {
733 struct Visit;
734
735 impl<'de> Visitor<'de> for Visit {
736 type Value = PatternContext;
737
738 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
739 formatter.write_str("struct PatternContext")
740 }
741
742 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
743 where
744 A: SeqAccess<'de>,
745 {
746 let mut context = BTreeMap::new();
747
748 #[derive(Deserialize)]
749 struct PatternContextItemOwned {
750 name: String,
751 value: u32,
752 }
753
754 while let Some(PatternContextItemOwned { name, value }) = seq.next_element()? {
755 context.insert(name, value);
756 }
757
758 Ok(PatternContext(context))
759 }
760 }
761
762 deserializer.deserialize_seq(Visit)
763 }
764}
765
766impl Serialize for PatternContext {
767 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
768 where
769 S: Serializer,
770 {
771 let mut seq = serializer.serialize_seq(Some(self.0.len()))?;
772 for (name, &value) in self.0.iter() {
773 seq.serialize_element(&PatternContextItem {
774 name: Cow::Borrowed(name),
775 value,
776 })?;
777 }
778 seq.end()
779 }
780}
781
782#[cfg(test)]
783mod test {
784 use std::io::Cursor;
785
786 use super::*;
787
788 const PAT_GROUP: &'static str = r#"
789post:
790 context:
791 - name: TMode
792 value: 1
793 patterns:
794 - .. b5 1....... b0
795 - .. b5 00...... 1c
796 - .. b5 .. 46
797 - .. b5 .. 01.01...
798 - .. b5 .. 68
799 - .. b5 .. 01.01... 10...... b0
800 - 1....... b5 .. af
801 - 100..... b0 .0 b5
802 - 00...... 1c .0 b5
803 - .. 01.01... .0 b5
804 - .. 68 .0 b5
805 - 2d e9 .. 0.
806 - 4d f8 04 ed
807post-bits: 16
808pre:
809 - '.......0 bd'
810 - '.......0 bd 00 00'
811 - '.......0 bd 00 bf'
812 - '.......0 bd c0 46'
813 - ff ff
814 - c0 46
815 - 70 47
816 - 70 47 00 00
817 - 70 47 c0 46
818 - 70 47 00 bf
819 - 000..... b0 .0 bd
820 - 00 bf
821 - af f3 00 80
822 - bd e8 .. 0.
823 - 46 f7
824 - 5d f8 0....... fb
825 - 5d f8 04 fb
826 - bd e8 .. 100.....
827total-bits: 32
828"#;
829
830 const PAT_WITH_CTX: &'static str = r#"
831context:
832- name: TMode
833 value: 1
834patterns:
835- .. b5 1....... b0
836- .. b5 00...... 1c
837- .. b5 .. 46
838- .. b5 .. 01.01...
839- .. b5 .. 68
840- .. b5 .. 01.01... 10...... b0
841- 1....... b5 .. af
842- 100..... b0 .0 b5
843- 00...... 1c .0 b5
844- .. 01.01... .0 b5
845- .. 68 .0 b5
846- 2d e9 .. 0.
847- 4d f8 04 ed
848"#;
849
850 const PATS_WITH_CTX: &'static str = r#"
851- pattern:
852 context:
853 - name: TMode
854 value: 1
855 patterns:
856 - .. b5 1....... b0
857 - .. b5 00...... 1c
858 - .. b5 .. 46
859 - .. b5 .. 01.01...
860 - .. b5 .. 68
861 - .. b5 .. 01.01... 10...... b0
862 - 1....... b5 .. af
863 - 100..... b0 .0 b5
864 - 00...... 1c .0 b5
865 - .. 01.01... .0 b5
866 - .. 68 .0 b5
867 - 2d e9 .. 0.
868 - 4d f8 04 ed
869- pattern-group:
870 post:
871 context:
872 - name: TMode
873 value: 1
874 patterns:
875 - .. b5 1....... b0
876 - .. b5 00...... 1c
877 - .. b5 .. 46
878 - .. b5 .. 01.01...
879 - .. b5 .. 68
880 - .. b5 .. 01.01... 10...... b0
881 - 1....... b5 .. af
882 - 100..... b0 .0 b5
883 - 00...... 1c .0 b5
884 - .. 01.01... .0 b5
885 - .. 68 .0 b5
886 - 2d e9 .. 0.
887 - 4d f8 04 ed
888 post-bits: 16
889 pre:
890 - '.......0 bd'
891 - '.......0 bd 00 00'
892 - '.......0 bd 00 bf'
893 - '.......0 bd c0 46'
894 - ff ff
895 - c0 46
896 - 70 47
897 - 70 47 00 00
898 - 70 47 c0 46
899 - 70 47 00 bf
900 - 000..... b0 .0 bd
901 - 00 bf
902 - af f3 00 80
903 - bd e8 .. 0.
904 - 46 f7
905 - 5d f8 0....... fb
906 - 5d f8 04 fb
907 - bd e8 .. 100.....
908 total-bits: 32
909"#;
910
911 const PAT: &'static str = r#"
912architecture: ARM:LE:32:*:*
913patterns:
914- pattern-group:
915 post:
916 context:
917 - name: TMode
918 value: 1
919 patterns:
920 - .. b5 1....... b0
921 - .. b5 00...... 1c
922 - .. b5 .. 46
923 - .. b5 .. 01.01...
924 - .. b5 .. 68
925 - .. b5 .. 01.01... 10...... b0
926 - 1....... b5 .. af
927 - 100..... b0 .0 b5
928 - 00...... 1c .0 b5
929 - .. 01.01... .0 b5
930 - .. 68 .0 b5
931 - 2d e9 .. 0.
932 - 4d f8 04 ed
933 post-bits: 16
934 pre:
935 - '.......0 bd'
936 - '.......0 bd 00 00'
937 - '.......0 bd 00 bf'
938 - '.......0 bd c0 46'
939 - ff ff
940 - c0 46
941 - 70 47
942 - 70 47 00 00
943 - 70 47 c0 46
944 - 70 47 00 bf
945 - 000..... b0 .0 bd
946 - 00 bf
947 - af f3 00 80
948 - bd e8 .. 0.
949 - 46 f7
950 - 5d f8 0....... fb
951 - 5d f8 04 fb
952 - bd e8 .. 100.....
953 total-bits: 32
954- pattern:
955 context:
956 - name: TMode
957 value: 0
958 patterns:
959 - .. 0. 8f e2 .. 0. 8c e2 .. 0. bc e5
960- pattern:
961 context:
962 - name: TMode
963 value: 1
964 patterns:
965 - 03 b4 01 48 01 90 01 bd
966- pattern:
967 context:
968 - name: TMode
969 value: 1
970 patterns:
971 - 10 b5
972"#;
973
974 #[test]
975 fn test_yaml_parse() -> Result<(), Box<dyn std::error::Error>> {
976 let _ = serde_yaml::from_str::<PatternGroup>(PAT_GROUP)?;
977 let _ = serde_yaml::from_str::<PatternsWithContext>(PAT_WITH_CTX)?;
978 let _ = serde_yaml::from_str::<PatternOrGroupSeq>(PATS_WITH_CTX)?;
979
980 let _ = serde_yaml::from_reader::<_, PatternSet>(Cursor::new(PAT))?;
981 let v = serde_yaml::from_str::<PatternSet>(PAT)?;
982 let _ = serde_yaml::to_string(&v)?;
983
984 Ok(())
985 }
986
987 #[test]
988 fn test_pattern_parse() -> Result<(), Box<dyn std::error::Error>> {
989 let (_, pattern) = Pattern::parse(" .f 0. 000.011. ")?;
990
991 assert_eq!(pattern.data, b"\x0f\x00\x06");
992 assert_eq!(pattern.mask, b"\x0f\xf0\xee");
993
994 assert!(pattern.norm.is_match(b"\x1f\x01\xff"));
996 assert!(pattern.norm.is_match(b"\x10\x11\x00"));
997
998 assert!(pattern.is_match(b"\x1f\x01\x17"));
1000 assert!(pattern.is_match(b"\x0f\x00\x06"));
1001 assert!(!pattern.is_match(b"\x0f\x00\x08"));
1002
1003 Ok(())
1004 }
1005}