fugue_fspec/
pattern.rs

1use std::borrow::Cow;
2use std::collections::BTreeMap;
3use std::fmt::{Display, Write};
4use std::fs::File;
5use std::io::{self, BufReader, Read};
6use std::ops::Range;
7use std::path::{Path, PathBuf};
8use std::str::FromStr;
9
10use nom::branch::alt;
11use nom::bytes::complete::is_a;
12use nom::character::complete::space0;
13use nom::error::{Error, ErrorKind};
14use nom::multi::fold_many1;
15use nom::sequence::delimited;
16use nom::{Err, Finish, IResult};
17
18use regex::bytes::RegexBuilder;
19pub use regex::bytes::{Match, Regex};
20
21use serde::de::{SeqAccess, Visitor};
22use serde::ser::SerializeSeq;
23use serde::{Deserialize, Deserializer, Serialize, Serializer};
24use thiserror::Error;
25
26use crate::common::Language;
27
28#[derive(Debug, Error)]
29pub enum PatternError {
30    #[error("cannot parse pattern: {0:?}")]
31    Parse(ErrorKind),
32}
33
34#[derive(Clone)]
35pub struct Pattern {
36    data: Vec<u8>,
37    mask: Vec<u8>,
38    norm: Regex,
39    norm_pfx: Regex,
40    bits: u32,
41}
42
43const HTABLE: [u8; 16] = [
44    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66,
45];
46
47const VTABLE_02: [u8; 256] = [
48    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
49    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
50    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
51    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
52    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
53    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
54    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
55    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
56    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
57    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
58    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
59    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
60    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
61    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
62];
63
64const MTABLE_02: [u8; 256] = [
65    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
66    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
67    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
68    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
69    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
70    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
71    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
72    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
73    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
74    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
75    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
76    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
77    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
78    0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
79];
80
81const VTABLE_16: [u8; 256] = [
82    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
83    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
84    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
85    0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0,
86    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
87    0x0, 0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
88    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
89    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
90    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
91    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
92    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
93    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
94    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
95    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
96];
97
98const MTABLE_16: [u8; 256] = [
99    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
100    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
101    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x0, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
102    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
103    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
104    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
105    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
106    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
107    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
108    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
109    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
110    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
111    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
112    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
113];
114
115impl Pattern {
116    fn parse_hex(input: &str) -> IResult<&str, (u8, u8)> {
117        let (input, pattern) = is_a(b"0123456789abcdefABCDEF.".as_ref())(input)?;
118        if pattern.len() != 2 {
119            return Err(Err::Error(Error::new(input, ErrorKind::Satisfy)));
120        }
121
122        let bytes = pattern.as_bytes();
123
124        let v0 = VTABLE_16[bytes[0] as usize] << 4;
125        let m0 = MTABLE_16[bytes[0] as usize] << 4;
126
127        let v1 = VTABLE_16[bytes[1] as usize];
128        let m1 = MTABLE_16[bytes[1] as usize];
129
130        let v = v0 | v1;
131        let m = m0 | m1;
132
133        Ok((input, (v, m)))
134    }
135
136    fn parse_bin(input: &str) -> IResult<&str, (u8, u8)> {
137        let (input, pattern) = is_a(b"01.".as_ref())(input)?;
138        if pattern.len() != 8 {
139            return Err(Err::Error(Error::new(input, ErrorKind::Satisfy)));
140        }
141
142        let bytes = pattern.as_bytes();
143
144        let v0 = VTABLE_02[bytes[0] as usize] << 7;
145        let m0 = MTABLE_02[bytes[0] as usize] << 7;
146
147        let v1 = VTABLE_02[bytes[1] as usize] << 6;
148        let m1 = MTABLE_02[bytes[1] as usize] << 6;
149
150        let v2 = VTABLE_02[bytes[2] as usize] << 5;
151        let m2 = MTABLE_02[bytes[2] as usize] << 5;
152
153        let v3 = VTABLE_02[bytes[3] as usize] << 4;
154        let m3 = MTABLE_02[bytes[3] as usize] << 4;
155
156        let v4 = VTABLE_02[bytes[4] as usize] << 3;
157        let m4 = MTABLE_02[bytes[4] as usize] << 3;
158
159        let v5 = VTABLE_02[bytes[5] as usize] << 2;
160        let m5 = MTABLE_02[bytes[5] as usize] << 2;
161
162        let v6 = VTABLE_02[bytes[6] as usize] << 1;
163        let m6 = MTABLE_02[bytes[6] as usize] << 1;
164
165        let v7 = VTABLE_02[bytes[7] as usize];
166        let m7 = MTABLE_02[bytes[7] as usize];
167
168        let v = v0 | v1 | v2 | v3 | v4 | v5 | v6 | v7;
169        let m = m0 | m1 | m2 | m3 | m4 | m5 | m6 | m7;
170
171        Ok((input, (v, m)))
172    }
173
174    fn parse_one(input: &str) -> IResult<&str, (u8, u8)> {
175        alt((Self::parse_hex, Self::parse_bin))(input)
176    }
177
178    fn parse(input: &str) -> IResult<&str, Pattern> {
179        let (input, (data, mask, regex)) = fold_many1(
180            delimited(space0, Self::parse_one, space0),
181            || (Vec::new(), Vec::new(), String::from("^")),
182            |(mut acc_v, mut acc_m, mut acc_r), (v, m)| {
183                acc_v.push(v);
184                acc_m.push(m);
185                if m != 0xff {
186                    acc_r.push('.');
187                } else {
188                    acc_r.push_str("\\x");
189                    acc_r.push(HTABLE[(v >> 4) as usize] as char);
190                    acc_r.push(HTABLE[(v & 0xf) as usize] as char);
191                }
192                (acc_v, acc_m, acc_r)
193            },
194        )(input)?;
195
196        let norm = RegexBuilder::new(&regex[1..])
197            .unicode(false)
198            .dot_matches_new_line(true)
199            .build()
200            .map_err(|_| Err::Error(Error::new(input, ErrorKind::MapRes)))?;
201
202        let norm_pfx = RegexBuilder::new(&regex)
203            .unicode(false)
204            .dot_matches_new_line(true)
205            .build()
206            .map_err(|_| Err::Error(Error::new(input, ErrorKind::MapRes)))?;
207
208        let bits = mask.iter().map(|b| b.count_ones()).sum::<u32>();
209
210        Ok((
211            input,
212            Self {
213                data,
214                mask,
215                norm,
216                norm_pfx,
217                bits,
218            },
219        ))
220    }
221
222    pub fn is_match_at(&self, bytes: impl AsRef<[u8]>, offset: usize) -> bool {
223        let bytes = bytes.as_ref();
224        offset < bytes.len()
225            && bytes.len() - offset >= self.data.len()
226            && self
227                .data
228                .iter()
229                .zip(bytes.iter().zip(self.mask.iter()))
230                .all(|(v, (b, m))| *v == b & m)
231    }
232
233    pub fn is_match(&self, bytes: impl AsRef<[u8]>) -> bool {
234        self.is_match_at(bytes, 0)
235    }
236
237    pub fn matches_start(&self, bytes: impl AsRef<[u8]>) -> bool {
238        self.is_match(bytes)
239    }
240
241    pub fn matches_end(&self, bytes: impl AsRef<[u8]>) -> bool {
242        let bytes = bytes.as_ref();
243        if bytes.len() < self.data.len() {
244            return false;
245        }
246        let (_, suffix) = bytes.split_at(bytes.len() - self.data.len());
247        self.is_match(suffix)
248    }
249
250    pub fn normalised_matcher(&self) -> &Regex {
251        &self.norm
252    }
253
254    pub fn normalised_prefix_matcher(&self) -> &Regex {
255        &self.norm_pfx
256    }
257
258    pub fn find_iter<'a>(&'a self, bytes: &'a [u8]) -> impl Iterator<Item = Match<'a>> {
259        self.normalised_matcher()
260            .find_iter(bytes)
261            .filter(|m| self.is_match(m.as_bytes()))
262    }
263
264    pub fn bits(&self) -> u32 {
265        self.bits
266    }
267
268    pub fn len(&self) -> usize {
269        self.data.len()
270    }
271}
272
273impl Display for Pattern {
274    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
275        for (i, (v, m)) in self.data.iter().zip(self.mask.iter()).enumerate() {
276            if i != 0 {
277                f.write_char(' ')?;
278            }
279            match m {
280                0x00 => f.write_str("..")?,
281                0x0f => {
282                    f.write_char('.')?;
283                    f.write_char(HTABLE[(v & 0xf) as usize] as char)?;
284                }
285                0xf0 => {
286                    f.write_char(HTABLE[(v >> 4) as usize] as char)?;
287                    f.write_char('.')?;
288                }
289                0xff => {
290                    f.write_char(HTABLE[(v >> 4) as usize] as char)?;
291                    f.write_char(HTABLE[(v & 0xf) as usize] as char)?;
292                }
293                _ => {
294                    for i in (0..8).rev() {
295                        if m >> i & 1 == 0 {
296                            f.write_char('.')?;
297                        } else {
298                            f.write_char(HTABLE[(v >> i & 1) as usize] as char)?;
299                        }
300                    }
301                }
302            }
303        }
304        Ok(())
305    }
306}
307
308impl FromStr for Pattern {
309    type Err = PatternError;
310
311    fn from_str(s: &str) -> Result<Self, Self::Err> {
312        let (input, pattern) = Pattern::parse(s)
313            .finish()
314            .map_err(|k| PatternError::Parse(k.code))?;
315
316        if !input.is_empty() {
317            return Err(PatternError::Parse(ErrorKind::Complete));
318        }
319
320        Ok(pattern)
321    }
322}
323
324impl<'de> Deserialize<'de> for Pattern {
325    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
326    where
327        D: Deserializer<'de>,
328    {
329        let s = Cow::<str>::deserialize(deserializer)?;
330        Self::from_str(&*s).map_err(<D::Error as serde::de::Error>::custom)
331    }
332}
333
334impl Serialize for Pattern {
335    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
336    where
337        S: Serializer,
338    {
339        serializer.collect_str(self)
340    }
341}
342
343#[derive(Debug, Error)]
344pub enum PatternSetError {
345    #[error("cannot parse patterns: {0}")]
346    Parse(serde_yaml::Error),
347    #[error("cannot parse patterns from `{0}`: {1}")]
348    ParseFile(PathBuf, serde_yaml::Error),
349    #[error("cannot parse patterns from `{0}`: {1}")]
350    ReadFile(PathBuf, io::Error),
351}
352
353#[derive(Clone)]
354pub struct PatternSet {
355    architecture: Language,
356    groups: Vec<PatternGroup>,
357    patterns: Vec<PatternsWithContext>,
358}
359
360pub struct PatternSetMatchIter<'a>(
361    Box<dyn Iterator<Item = (Range<usize>, &'a PatternContext)> + 'a>,
362);
363
364impl<'a> Iterator for PatternSetMatchIter<'a> {
365    type Item = (Range<usize>, &'a PatternContext);
366
367    fn next(&mut self) -> Option<Self::Item> {
368        self.0.next()
369    }
370}
371
372impl PatternSet {
373    pub fn from_str(input: impl AsRef<str>) -> Result<PatternSet, PatternSetError> {
374        serde_yaml::from_str(input.as_ref()).map_err(PatternSetError::Parse)
375    }
376
377    pub fn from_reader(reader: impl Read) -> Result<PatternSet, PatternSetError> {
378        serde_yaml::from_reader(reader).map_err(PatternSetError::Parse)
379    }
380
381    pub fn from_file(path: impl AsRef<Path>) -> Result<PatternSet, PatternSetError> {
382        let path = path.as_ref();
383        let file = BufReader::new(
384            File::open(path).map_err(|e| PatternSetError::ReadFile(path.to_owned(), e))?,
385        );
386        serde_yaml::from_reader(file).map_err(|e| PatternSetError::ParseFile(path.to_owned(), e))
387    }
388
389    pub fn architecture(&self) -> &Language {
390        &self.architecture
391    }
392
393    pub fn matches<'a>(&'a self, bytes: &'a [u8]) -> PatternSetMatchIter<'a> {
394        PatternSetMatchIter(Box::new(
395            self.groups
396                .iter()
397                .flat_map(|group| group.matches(bytes))
398                .chain(
399                    self.patterns
400                        .iter()
401                        .flat_map(|pattern| pattern.matches(bytes)),
402                ),
403        ))
404    }
405}
406
407impl<'de> Deserialize<'de> for PatternSet {
408    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
409    where
410        D: Deserializer<'de>,
411    {
412        let ps = PatternSetT::deserialize(deserializer)?;
413
414        Ok(Self {
415            architecture: ps.architecture.into_owned(),
416            groups: ps.patterns.groups.into_owned(),
417            patterns: ps.patterns.patterns.into_owned(),
418        })
419    }
420}
421
422impl Serialize for PatternSet {
423    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
424    where
425        S: Serializer,
426    {
427        let t = PatternSetT {
428            architecture: Cow::Borrowed(&self.architecture),
429            patterns: PatternOrGroupSeq {
430                groups: Cow::Borrowed(&self.groups),
431                patterns: Cow::Borrowed(&self.patterns),
432            },
433        };
434
435        t.serialize(serializer)
436    }
437}
438
439#[derive(Clone)]
440pub struct Patterns {
441    groups: Vec<PatternGroup>,
442    patterns: Vec<PatternsWithContext>,
443}
444
445impl Patterns {
446    pub fn merge(&mut self, other: Self) {
447        self.groups.extend(other.groups);
448        self.patterns.extend(other.patterns);
449    }
450
451    pub fn matches<'a>(&'a self, bytes: &'a [u8]) -> PatternSetMatchIter<'a> {
452        PatternSetMatchIter(Box::new(
453            self.groups
454                .iter()
455                .flat_map(|group| group.matches(bytes))
456                .chain(
457                    self.patterns
458                        .iter()
459                        .flat_map(|pattern| pattern.matches(bytes)),
460                ),
461        ))
462    }
463}
464
465impl<'de> Deserialize<'de> for Patterns {
466    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
467    where
468        D: Deserializer<'de>,
469    {
470        let ps = PatternOrGroupSeq::deserialize(deserializer)?;
471
472        Ok(Self {
473            groups: ps.groups.into_owned(),
474            patterns: ps.patterns.into_owned(),
475        })
476    }
477}
478
479impl Serialize for Patterns {
480    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
481    where
482        S: Serializer,
483    {
484        let t = PatternOrGroupSeq {
485            groups: Cow::Borrowed(&self.groups),
486            patterns: Cow::Borrowed(&self.patterns),
487        };
488
489        t.serialize(serializer)
490    }
491}
492
493#[derive(Deserialize, Serialize)]
494#[serde(untagged)]
495enum PatternOrGroup<'a> {
496    Pattern {
497        pattern: Cow<'a, PatternsWithContext>,
498    },
499    PatternGroup {
500        #[serde(rename = "pattern-group")]
501        pattern_group: Cow<'a, PatternGroup>,
502    },
503}
504
505#[derive(Deserialize, Serialize)]
506struct PatternSetT<'a> {
507    architecture: Cow<'a, Language>,
508    #[serde(bound(deserialize = "PatternOrGroupSeq<'a>: Deserialize<'de>"))]
509    patterns: PatternOrGroupSeq<'a>,
510}
511
512#[derive(Clone)]
513pub(crate) struct PatternOrGroupSeq<'a> {
514    groups: Cow<'a, [PatternGroup]>,
515    patterns: Cow<'a, [PatternsWithContext]>,
516}
517
518impl<'de> Deserialize<'de> for PatternOrGroupSeq<'_> {
519    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
520    where
521        D: Deserializer<'de>,
522    {
523        struct Visit;
524
525        impl<'de> Visitor<'de> for Visit {
526            type Value = PatternOrGroupSeq<'static>;
527
528            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
529                formatter.write_str("struct PatternOrGroupSeq")
530            }
531
532            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
533            where
534                A: SeqAccess<'de>,
535            {
536                let mut groups = Vec::new();
537                let mut patterns = Vec::new();
538
539                #[derive(Deserialize)]
540                #[serde(untagged)]
541                enum PatternOrGroupOwned {
542                    Pattern {
543                        pattern: PatternsWithContext,
544                    },
545                    PatternGroup {
546                        #[serde(rename = "pattern-group")]
547                        pattern_group: PatternGroup,
548                    },
549                }
550
551                while let Some(pg) = seq.next_element::<PatternOrGroupOwned>()? {
552                    match pg {
553                        PatternOrGroupOwned::Pattern { pattern: p } => {
554                            patterns.push(p);
555                        }
556                        PatternOrGroupOwned::PatternGroup { pattern_group: g } => {
557                            groups.push(g);
558                        }
559                    }
560                }
561
562                Ok(PatternOrGroupSeq {
563                    groups: Cow::Owned(groups),
564                    patterns: Cow::Owned(patterns),
565                })
566            }
567        }
568
569        deserializer.deserialize_seq(Visit)
570    }
571}
572
573impl Serialize for PatternOrGroupSeq<'_> {
574    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
575    where
576        S: Serializer,
577    {
578        let mut ser = serializer.serialize_seq(Some(self.patterns.len() + self.groups.len()))?;
579
580        for g in self.groups.iter() {
581            ser.serialize_element(&PatternOrGroup::PatternGroup {
582                pattern_group: Cow::Borrowed(g),
583            })?;
584        }
585
586        for p in self.patterns.iter() {
587            ser.serialize_element(&PatternOrGroup::Pattern {
588                pattern: Cow::Borrowed(p),
589            })?;
590        }
591
592        ser.end()
593    }
594}
595
596#[derive(Clone, Deserialize, Serialize)]
597pub struct PatternGroup {
598    #[serde(rename = "total-bits")]
599    total_bits: u32,
600    #[serde(rename = "post-bits")]
601    post_bits: u32,
602    #[serde(rename = "post")]
603    post_patterns: PatternsWithContext,
604    #[serde(rename = "pre")]
605    pre_patterns: Vec<Pattern>,
606}
607
608impl PatternGroup {
609    pub fn matches<'a>(
610        &'a self,
611        bytes: &'a [u8],
612    ) -> impl Iterator<Item = (Range<usize>, &'a PatternContext)> + 'a {
613        let bytes = bytes.as_ref();
614
615        self.post_patterns.patterns.iter().flat_map(|pattern| {
616            pattern
617                .normalised_matcher()
618                .find_iter(bytes)
619                .filter_map(|m| {
620                    // impossible to match pre-pattern in this case
621                    if m.start() == 0 {
622                        return None;
623                    }
624
625                    let pre_bytes = &bytes[..m.start()];
626                    let post_bits = pattern.bits();
627
628                    if pattern.matches_start(m.as_bytes())
629                        && self.pre_patterns.iter().any(|pre_pattern| {
630                            // matches pre-pattern, and also matches at least total bits between
631                            // pre- and post-patterns
632                            pre_pattern.matches_end(pre_bytes)
633                                && pre_pattern.bits() + post_bits >= self.total_bits
634                        })
635                    {
636                        Some((m.range(), &self.post_patterns.context))
637                    } else {
638                        None
639                    }
640                })
641        })
642    }
643}
644
645#[derive(Clone, Serialize)]
646pub struct PatternsWithContext {
647    patterns: Vec<Pattern>,
648    #[serde(default)]
649    context: PatternContext,
650}
651
652impl<'de> Deserialize<'de> for PatternsWithContext {
653    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
654    where
655        D: Deserializer<'de>,
656    {
657        #[derive(Deserialize)]
658        #[serde(untagged)]
659        enum PatternWithContexT {
660            Pattern(Pattern),
661            PatternWithContext {
662                patterns: Vec<Pattern>,
663                #[serde(default)]
664                context: PatternContext,
665            },
666        }
667
668        let result = match PatternWithContexT::deserialize(deserializer)? {
669            PatternWithContexT::Pattern(pattern) => Self {
670                patterns: vec![pattern],
671                context: PatternContext::default(),
672            },
673            PatternWithContexT::PatternWithContext { patterns, context } => {
674                Self { patterns, context }
675            }
676        };
677
678        Ok(result)
679    }
680}
681
682impl PatternsWithContext {
683    pub fn matches<'a>(
684        &'a self,
685        bytes: &'a [u8],
686    ) -> impl Iterator<Item = (Range<usize>, &'a PatternContext)> + 'a {
687        let bytes = bytes.as_ref();
688
689        self.patterns.iter().flat_map(|pattern| {
690            pattern
691                .normalised_matcher()
692                .find_iter(bytes)
693                .filter_map(|m| {
694                    if pattern.is_match(m.as_bytes()) {
695                        Some((m.range(), &self.context))
696                    } else {
697                        None
698                    }
699                })
700        })
701    }
702
703    pub fn matches_exact<'a>(&'a self, bytes: &'a [u8]) -> bool {
704        self.patterns.iter().any(|pattern| {
705            bytes.len() == pattern.len()
706                && pattern.normalised_prefix_matcher().is_match(&bytes)
707                && pattern.is_match(bytes)
708        })
709    }
710}
711
712#[derive(Clone, Deserialize, Serialize)]
713struct PatternContextItem<'a> {
714    name: Cow<'a, String>,
715    value: u32,
716}
717
718#[derive(Clone, Default)]
719#[repr(transparent)]
720pub struct PatternContext(BTreeMap<String, u32>);
721
722impl PatternContext {
723    pub fn variables(&self) -> impl Iterator<Item = (&str, u32)> {
724        self.0.iter().map(|(name, value)| (&**name, *value))
725    }
726}
727
728impl<'de> Deserialize<'de> for PatternContext {
729    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
730    where
731        D: Deserializer<'de>,
732    {
733        struct Visit;
734
735        impl<'de> Visitor<'de> for Visit {
736            type Value = PatternContext;
737
738            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
739                formatter.write_str("struct PatternContext")
740            }
741
742            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
743            where
744                A: SeqAccess<'de>,
745            {
746                let mut context = BTreeMap::new();
747
748                #[derive(Deserialize)]
749                struct PatternContextItemOwned {
750                    name: String,
751                    value: u32,
752                }
753
754                while let Some(PatternContextItemOwned { name, value }) = seq.next_element()? {
755                    context.insert(name, value);
756                }
757
758                Ok(PatternContext(context))
759            }
760        }
761
762        deserializer.deserialize_seq(Visit)
763    }
764}
765
766impl Serialize for PatternContext {
767    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
768    where
769        S: Serializer,
770    {
771        let mut seq = serializer.serialize_seq(Some(self.0.len()))?;
772        for (name, &value) in self.0.iter() {
773            seq.serialize_element(&PatternContextItem {
774                name: Cow::Borrowed(name),
775                value,
776            })?;
777        }
778        seq.end()
779    }
780}
781
782#[cfg(test)]
783mod test {
784    use std::io::Cursor;
785
786    use super::*;
787
788    const PAT_GROUP: &'static str = r#"
789post:
790  context:
791  - name: TMode
792    value: 1
793  patterns:
794  - .. b5 1....... b0
795  - .. b5 00...... 1c
796  - .. b5 .. 46
797  - .. b5 .. 01.01...
798  - .. b5 .. 68
799  - .. b5 .. 01.01... 10...... b0
800  - 1....... b5 .. af
801  - 100..... b0 .0 b5
802  - 00...... 1c .0 b5
803  - .. 01.01... .0 b5
804  - .. 68 .0 b5
805  - 2d e9 .. 0.
806  - 4d f8 04 ed
807post-bits: 16
808pre:
809  - '.......0 bd'
810  - '.......0 bd 00 00'
811  - '.......0 bd 00 bf'
812  - '.......0 bd c0 46'
813  - ff ff
814  - c0 46
815  - 70 47
816  - 70 47 00 00
817  - 70 47 c0 46
818  - 70 47 00 bf
819  - 000..... b0 .0 bd
820  - 00 bf
821  - af f3 00 80
822  - bd e8 .. 0.
823  - 46 f7
824  - 5d f8 0....... fb
825  - 5d f8 04 fb
826  - bd e8 .. 100.....
827total-bits: 32
828"#;
829
830    const PAT_WITH_CTX: &'static str = r#"
831context:
832- name: TMode
833  value: 1
834patterns:
835- .. b5 1....... b0
836- .. b5 00...... 1c
837- .. b5 .. 46
838- .. b5 .. 01.01...
839- .. b5 .. 68
840- .. b5 .. 01.01... 10...... b0
841- 1....... b5 .. af
842- 100..... b0 .0 b5
843- 00...... 1c .0 b5
844- .. 01.01... .0 b5
845- .. 68 .0 b5
846- 2d e9 .. 0.
847- 4d f8 04 ed
848"#;
849
850    const PATS_WITH_CTX: &'static str = r#"
851- pattern:
852    context:
853    - name: TMode
854      value: 1
855    patterns:
856    - .. b5 1....... b0
857    - .. b5 00...... 1c
858    - .. b5 .. 46
859    - .. b5 .. 01.01...
860    - .. b5 .. 68
861    - .. b5 .. 01.01... 10...... b0
862    - 1....... b5 .. af
863    - 100..... b0 .0 b5
864    - 00...... 1c .0 b5
865    - .. 01.01... .0 b5
866    - .. 68 .0 b5
867    - 2d e9 .. 0.
868    - 4d f8 04 ed
869- pattern-group:
870    post:
871      context:
872      - name: TMode
873        value: 1
874      patterns:
875      - .. b5 1....... b0
876      - .. b5 00...... 1c
877      - .. b5 .. 46
878      - .. b5 .. 01.01...
879      - .. b5 .. 68
880      - .. b5 .. 01.01... 10...... b0
881      - 1....... b5 .. af
882      - 100..... b0 .0 b5
883      - 00...... 1c .0 b5
884      - .. 01.01... .0 b5
885      - .. 68 .0 b5
886      - 2d e9 .. 0.
887      - 4d f8 04 ed
888    post-bits: 16
889    pre:
890      - '.......0 bd'
891      - '.......0 bd 00 00'
892      - '.......0 bd 00 bf'
893      - '.......0 bd c0 46'
894      - ff ff
895      - c0 46
896      - 70 47
897      - 70 47 00 00
898      - 70 47 c0 46
899      - 70 47 00 bf
900      - 000..... b0 .0 bd
901      - 00 bf
902      - af f3 00 80
903      - bd e8 .. 0.
904      - 46 f7
905      - 5d f8 0....... fb
906      - 5d f8 04 fb
907      - bd e8 .. 100.....
908    total-bits: 32
909"#;
910
911    const PAT: &'static str = r#"
912architecture: ARM:LE:32:*:*
913patterns:
914- pattern-group:
915    post:
916      context:
917      - name: TMode
918        value: 1
919      patterns:
920      - .. b5 1....... b0
921      - .. b5 00...... 1c
922      - .. b5 .. 46
923      - .. b5 .. 01.01...
924      - .. b5 .. 68
925      - .. b5 .. 01.01... 10...... b0
926      - 1....... b5 .. af
927      - 100..... b0 .0 b5
928      - 00...... 1c .0 b5
929      - .. 01.01... .0 b5
930      - .. 68 .0 b5
931      - 2d e9 .. 0.
932      - 4d f8 04 ed
933    post-bits: 16
934    pre:
935      - '.......0 bd'
936      - '.......0 bd 00 00'
937      - '.......0 bd 00 bf'
938      - '.......0 bd c0 46'
939      - ff ff
940      - c0 46
941      - 70 47
942      - 70 47 00 00
943      - 70 47 c0 46
944      - 70 47 00 bf
945      - 000..... b0 .0 bd
946      - 00 bf
947      - af f3 00 80
948      - bd e8 .. 0.
949      - 46 f7
950      - 5d f8 0....... fb
951      - 5d f8 04 fb
952      - bd e8 .. 100.....
953    total-bits: 32
954- pattern:
955    context:
956    - name: TMode
957      value: 0
958    patterns:
959    - .. 0. 8f e2 .. 0. 8c e2 .. 0. bc e5
960- pattern:
961    context:
962    - name: TMode
963      value: 1
964    patterns:
965    - 03 b4 01 48 01 90 01 bd
966- pattern:
967    context:
968    - name: TMode
969      value: 1
970    patterns:
971    - 10 b5
972"#;
973
974    #[test]
975    fn test_yaml_parse() -> Result<(), Box<dyn std::error::Error>> {
976        let _ = serde_yaml::from_str::<PatternGroup>(PAT_GROUP)?;
977        let _ = serde_yaml::from_str::<PatternsWithContext>(PAT_WITH_CTX)?;
978        let _ = serde_yaml::from_str::<PatternOrGroupSeq>(PATS_WITH_CTX)?;
979
980        let _ = serde_yaml::from_reader::<_, PatternSet>(Cursor::new(PAT))?;
981        let v = serde_yaml::from_str::<PatternSet>(PAT)?;
982        let _ = serde_yaml::to_string(&v)?;
983
984        Ok(())
985    }
986
987    #[test]
988    fn test_pattern_parse() -> Result<(), Box<dyn std::error::Error>> {
989        let (_, pattern) = Pattern::parse("  .f 0. 000.011. ")?;
990
991        assert_eq!(pattern.data, b"\x0f\x00\x06");
992        assert_eq!(pattern.mask, b"\x0f\xf0\xee");
993
994        // anything will match the regex
995        assert!(pattern.norm.is_match(b"\x1f\x01\xff"));
996        assert!(pattern.norm.is_match(b"\x10\x11\x00"));
997
998        // exact matches
999        assert!(pattern.is_match(b"\x1f\x01\x17"));
1000        assert!(pattern.is_match(b"\x0f\x00\x06"));
1001        assert!(!pattern.is_match(b"\x0f\x00\x08"));
1002
1003        Ok(())
1004    }
1005}