pe_sigscan/pattern.rs
1//! Wildcard byte pattern type and IDA-style hex parser.
2//!
3//! Two builder paths cover the common cases:
4//!
5//! - **Compile-time / static patterns.** Use the [`crate::pattern!`] macro
6//! defined in the crate root. Produces a `&'static [Option<u8>]` with no
7//! allocation. Preferred for hard-coded signatures known at build time.
8//! - **Runtime / dynamic patterns.** Use [`Pattern::from_ida`]. Allocates a
9//! `Vec<Option<u8>>`. Preferred when the pattern is loaded from config,
10//! a dump file, an IDA copy/paste at runtime, or user input.
11//!
12//! The parser accepts the canonical IDA-style hex syntax — see the
13//! `from_ida` doc for the exact grammar.
14
15use alloc::vec::Vec;
16
17use crate::error::{ParseErrorKind, ParsePatternError};
18
19/// Wildcard-aware byte pattern as a slice of `Option<u8>`. `Some(b)` matches
20/// the literal byte `b`; `None` matches any byte (the IDA-style `?` token).
21///
22/// Most callers will build patterns through the [`Pattern`] struct (for
23/// runtime IDA-string parsing) or the [`crate::pattern!`] macro (for
24/// compile-time constants). The raw slice type is exposed so the underlying
25/// scan functions accept any source of pattern data without locking callers
26/// into a particular wrapper.
27pub type WildcardPattern<'a> = &'a [Option<u8>];
28
29/// A wildcard byte pattern with parsed-from-string ergonomics.
30///
31/// Internally just a `Vec<Option<u8>>` for owned, runtime-built patterns.
32/// For static / compile-time patterns prefer the [`crate::pattern!`] macro,
33/// which produces a `&'static [Option<u8>]` with no allocation.
34///
35/// # Examples
36///
37/// ```
38/// use pe_sigscan::Pattern;
39///
40/// let p = Pattern::from_ida("48 8B 05 ?? ?? ?? ?? 48 89 41 08").unwrap();
41/// assert_eq!(p.len(), 11);
42/// assert_eq!(p.as_slice()[0], Some(0x48));
43/// assert_eq!(p.as_slice()[3], None);
44/// ```
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct Pattern {
47 bytes: Vec<Option<u8>>,
48}
49
50impl Pattern {
51 /// Construct a pattern from an existing `Vec<Option<u8>>`.
52 ///
53 /// Useful when the pattern is built programmatically (e.g. mutating an
54 /// existing `Vec<Option<u8>>` to add wildcards before scanning).
55 #[must_use]
56 pub fn new(bytes: Vec<Option<u8>>) -> Self {
57 Self { bytes }
58 }
59
60 /// Parse an IDA-style hex pattern string.
61 ///
62 /// Accepted token shapes (case-insensitive, separated by ASCII whitespace):
63 ///
64 /// - `XX` — two hex digits, matches the literal byte `0xXX`.
65 /// - `?` or `??` — wildcard, matches any byte.
66 ///
67 /// # Examples
68 ///
69 /// ```
70 /// use pe_sigscan::Pattern;
71 /// let p = Pattern::from_ida("48 8B ?? 89").unwrap();
72 /// assert_eq!(p.as_slice(), &[Some(0x48), Some(0x8B), None, Some(0x89)]);
73 /// ```
74 ///
75 /// # Errors
76 ///
77 /// Returns [`ParsePatternError`] if the input contains a token that is
78 /// neither two hex digits nor `?` / `??`, or if a token mixes `?` with
79 /// hex characters (`?A`, `1?`).
80 pub fn from_ida(s: &str) -> Result<Self, ParsePatternError> {
81 let mut bytes = Vec::new();
82 for (idx, tok) in s.split_ascii_whitespace().enumerate() {
83 let entry = match tok {
84 "?" | "??" => None,
85 _ => Some(parse_hex_byte(tok).map_err(|kind| ParsePatternError {
86 token_index: idx,
87 kind,
88 })?),
89 };
90 bytes.push(entry);
91 }
92 if bytes.is_empty() {
93 return Err(ParsePatternError {
94 token_index: 0,
95 kind: ParseErrorKind::Empty,
96 });
97 }
98 Ok(Self { bytes })
99 }
100
101 /// View the pattern as a [`WildcardPattern`]. Use this when calling the
102 /// scan functions: `find_in_text(base, pat.as_slice())`.
103 #[must_use]
104 pub fn as_slice(&self) -> WildcardPattern<'_> {
105 &self.bytes
106 }
107
108 /// Number of byte slots in the pattern (literal + wildcard).
109 #[must_use]
110 pub fn len(&self) -> usize {
111 self.bytes.len()
112 }
113
114 /// `true` if the pattern is empty.
115 #[must_use]
116 pub fn is_empty(&self) -> bool {
117 self.bytes.is_empty()
118 }
119}
120
121/// Parse a two-character hex byte. Returns the categorised error kind on
122/// failure so [`Pattern::from_ida`] can attach the token index.
123fn parse_hex_byte(tok: &str) -> Result<u8, ParseErrorKind> {
124 let bytes = tok.as_bytes();
125 if bytes.len() != 2 {
126 return Err(ParseErrorKind::InvalidLength);
127 }
128 let hi = hex_digit(bytes[0]).ok_or(ParseErrorKind::InvalidHexDigit)?;
129 let lo = hex_digit(bytes[1]).ok_or(ParseErrorKind::InvalidHexDigit)?;
130 Ok((hi << 4) | lo)
131}
132
133/// Convert a single ASCII hex digit byte (`0-9`, `a-f`, `A-F`) to its
134/// numeric value (`0..=15`). Returns `None` for any non-hex byte.
135fn hex_digit(b: u8) -> Option<u8> {
136 match b {
137 b'0'..=b'9' => Some(b - b'0'),
138 b'a'..=b'f' => Some(b - b'a' + 10),
139 b'A'..=b'F' => Some(b - b'A' + 10),
140 _ => None,
141 }
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147
148 #[test]
149 fn parse_basic() {
150 let p = Pattern::from_ida("48 8B 05").unwrap();
151 assert_eq!(p.as_slice(), &[Some(0x48), Some(0x8B), Some(0x05)]);
152 }
153
154 #[test]
155 fn parse_with_wildcards() {
156 let p = Pattern::from_ida("48 ?? 05 ??").unwrap();
157 assert_eq!(p.as_slice(), &[Some(0x48), None, Some(0x05), None]);
158 }
159
160 #[test]
161 fn parse_single_question_wildcard() {
162 let p = Pattern::from_ida("48 ? 05").unwrap();
163 assert_eq!(p.as_slice(), &[Some(0x48), None, Some(0x05)]);
164 }
165
166 #[test]
167 fn parse_case_insensitive() {
168 let upper = Pattern::from_ida("AB CD EF").unwrap();
169 let lower = Pattern::from_ida("ab cd ef").unwrap();
170 let mixed = Pattern::from_ida("aB Cd eF").unwrap();
171 assert_eq!(upper.as_slice(), lower.as_slice());
172 assert_eq!(lower.as_slice(), mixed.as_slice());
173 }
174
175 #[test]
176 fn parse_extra_whitespace_ok() {
177 let p = Pattern::from_ida(" 48\t8B\n??\r\n05 ").unwrap();
178 assert_eq!(p.as_slice(), &[Some(0x48), Some(0x8B), None, Some(0x05)]);
179 }
180
181 #[test]
182 fn parse_empty_errors() {
183 let err = Pattern::from_ida("").unwrap_err();
184 assert_eq!(err.kind, ParseErrorKind::Empty);
185 }
186
187 #[test]
188 fn parse_invalid_hex_errors() {
189 // Both nibbles invalid — exercises the high-nibble `?` propagation
190 // in `parse_hex_byte` (`hex_digit(bytes[0])` returns None first).
191 let err = Pattern::from_ida("48 ZZ 05").unwrap_err();
192 assert_eq!(err.token_index, 1);
193 assert_eq!(err.kind, ParseErrorKind::InvalidHexDigit);
194 }
195
196 #[test]
197 fn parse_invalid_low_nibble_only() {
198 // High nibble VALID (`1`), low nibble INVALID (`Z`). Exercises the
199 // low-nibble `?` propagation in `parse_hex_byte` — the high-nibble
200 // path succeeded, then `hex_digit(bytes[1])` returns None and the
201 // error propagates. Without this test, the low-nibble `?` line is
202 // unreachable by any other input shape.
203 let err = Pattern::from_ida("48 1Z 05").unwrap_err();
204 assert_eq!(err.token_index, 1);
205 assert_eq!(err.kind, ParseErrorKind::InvalidHexDigit);
206 }
207
208 #[test]
209 fn parse_invalid_length_errors() {
210 let err = Pattern::from_ida("48 8 05").unwrap_err();
211 assert_eq!(err.token_index, 1);
212 assert_eq!(err.kind, ParseErrorKind::InvalidLength);
213 }
214
215 #[test]
216 fn pattern_new_from_vec() {
217 let p = Pattern::new(alloc::vec![Some(0x48), None, Some(0x89)]);
218 assert_eq!(p.len(), 3);
219 assert_eq!(p.as_slice()[1], None);
220 }
221
222 #[test]
223 fn pattern_is_empty() {
224 let p = Pattern::new(alloc::vec![]);
225 assert!(p.is_empty());
226 assert_eq!(p.len(), 0);
227 }
228}