Skip to main content

pe_sigscan/
pattern.rs

1//! Wildcard byte pattern type and IDA-style hex parser.
2//!
3//! Two builder paths cover the common cases:
4//!
5//! - **Compile-time / static patterns.** Use the [`crate::pattern!`] macro
6//!   defined in the crate root. Produces a `&'static [Option<u8>]` with no
7//!   allocation. Preferred for hard-coded signatures known at build time.
8//! - **Runtime / dynamic patterns.** Use [`Pattern::from_ida`]. Allocates a
9//!   `Vec<Option<u8>>`. Preferred when the pattern is loaded from config,
10//!   a dump file, an IDA copy/paste at runtime, or user input.
11//!
12//! The parser accepts the canonical IDA-style hex syntax — see the
13//! `from_ida` doc for the exact grammar.
14
15use alloc::vec::Vec;
16
17use crate::error::{ParseErrorKind, ParsePatternError};
18
19/// Wildcard-aware byte pattern as a slice of `Option<u8>`. `Some(b)` matches
20/// the literal byte `b`; `None` matches any byte (the IDA-style `?` token).
21///
22/// Most callers will build patterns through the [`Pattern`] struct (for
23/// runtime IDA-string parsing) or the [`crate::pattern!`] macro (for
24/// compile-time constants). The raw slice type is exposed so the underlying
25/// scan functions accept any source of pattern data without locking callers
26/// into a particular wrapper.
27pub type WildcardPattern<'a> = &'a [Option<u8>];
28
29/// A wildcard byte pattern with parsed-from-string ergonomics.
30///
31/// Internally just a `Vec<Option<u8>>` for owned, runtime-built patterns.
32/// For static / compile-time patterns prefer the [`crate::pattern!`] macro,
33/// which produces a `&'static [Option<u8>]` with no allocation.
34///
35/// # Examples
36///
37/// ```
38/// use pe_sigscan::Pattern;
39///
40/// let p = Pattern::from_ida("48 8B 05 ?? ?? ?? ?? 48 89 41 08").unwrap();
41/// assert_eq!(p.len(), 11);
42/// assert_eq!(p.as_slice()[0], Some(0x48));
43/// assert_eq!(p.as_slice()[3], None);
44/// ```
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct Pattern {
47    bytes: Vec<Option<u8>>,
48}
49
50impl Pattern {
51    /// Construct a pattern from an existing `Vec<Option<u8>>`.
52    ///
53    /// Useful when the pattern is built programmatically (e.g. mutating an
54    /// existing `Vec<Option<u8>>` to add wildcards before scanning).
55    #[must_use]
56    pub fn new(bytes: Vec<Option<u8>>) -> Self {
57        Self { bytes }
58    }
59
60    /// Parse an IDA-style hex pattern string.
61    ///
62    /// Accepted token shapes (case-insensitive, separated by ASCII whitespace):
63    ///
64    /// - `XX` — two hex digits, matches the literal byte `0xXX`.
65    /// - `?` or `??` — wildcard, matches any byte.
66    ///
67    /// # Examples
68    ///
69    /// ```
70    /// use pe_sigscan::Pattern;
71    /// let p = Pattern::from_ida("48 8B ?? 89").unwrap();
72    /// assert_eq!(p.as_slice(), &[Some(0x48), Some(0x8B), None, Some(0x89)]);
73    /// ```
74    ///
75    /// # Errors
76    ///
77    /// Returns [`ParsePatternError`] if the input contains a token that is
78    /// neither two hex digits nor `?` / `??`, or if a token mixes `?` with
79    /// hex characters (`?A`, `1?`).
80    pub fn from_ida(s: &str) -> Result<Self, ParsePatternError> {
81        let mut bytes = Vec::new();
82        for (idx, tok) in s.split_ascii_whitespace().enumerate() {
83            let entry = match tok {
84                "?" | "??" => None,
85                _ => Some(parse_hex_byte(tok).map_err(|kind| ParsePatternError {
86                    token_index: idx,
87                    kind,
88                })?),
89            };
90            bytes.push(entry);
91        }
92        if bytes.is_empty() {
93            return Err(ParsePatternError {
94                token_index: 0,
95                kind: ParseErrorKind::Empty,
96            });
97        }
98        Ok(Self { bytes })
99    }
100
101    /// View the pattern as a [`WildcardPattern`]. Use this when calling the
102    /// scan functions: `find_in_text(base, pat.as_slice())`.
103    #[must_use]
104    pub fn as_slice(&self) -> WildcardPattern<'_> {
105        &self.bytes
106    }
107
108    /// Number of byte slots in the pattern (literal + wildcard).
109    #[must_use]
110    pub fn len(&self) -> usize {
111        self.bytes.len()
112    }
113
114    /// `true` if the pattern is empty.
115    #[must_use]
116    pub fn is_empty(&self) -> bool {
117        self.bytes.is_empty()
118    }
119}
120
121/// Parse a two-character hex byte. Returns the categorised error kind on
122/// failure so [`Pattern::from_ida`] can attach the token index.
123fn parse_hex_byte(tok: &str) -> Result<u8, ParseErrorKind> {
124    let bytes = tok.as_bytes();
125    if bytes.len() != 2 {
126        return Err(ParseErrorKind::InvalidLength);
127    }
128    let hi = hex_digit(bytes[0]).ok_or(ParseErrorKind::InvalidHexDigit)?;
129    let lo = hex_digit(bytes[1]).ok_or(ParseErrorKind::InvalidHexDigit)?;
130    Ok((hi << 4) | lo)
131}
132
133/// Convert a single ASCII hex digit byte (`0-9`, `a-f`, `A-F`) to its
134/// numeric value (`0..=15`). Returns `None` for any non-hex byte.
135fn hex_digit(b: u8) -> Option<u8> {
136    match b {
137        b'0'..=b'9' => Some(b - b'0'),
138        b'a'..=b'f' => Some(b - b'a' + 10),
139        b'A'..=b'F' => Some(b - b'A' + 10),
140        _ => None,
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn parse_basic() {
150        let p = Pattern::from_ida("48 8B 05").unwrap();
151        assert_eq!(p.as_slice(), &[Some(0x48), Some(0x8B), Some(0x05)]);
152    }
153
154    #[test]
155    fn parse_with_wildcards() {
156        let p = Pattern::from_ida("48 ?? 05 ??").unwrap();
157        assert_eq!(p.as_slice(), &[Some(0x48), None, Some(0x05), None]);
158    }
159
160    #[test]
161    fn parse_single_question_wildcard() {
162        let p = Pattern::from_ida("48 ? 05").unwrap();
163        assert_eq!(p.as_slice(), &[Some(0x48), None, Some(0x05)]);
164    }
165
166    #[test]
167    fn parse_case_insensitive() {
168        let upper = Pattern::from_ida("AB CD EF").unwrap();
169        let lower = Pattern::from_ida("ab cd ef").unwrap();
170        let mixed = Pattern::from_ida("aB Cd eF").unwrap();
171        assert_eq!(upper.as_slice(), lower.as_slice());
172        assert_eq!(lower.as_slice(), mixed.as_slice());
173    }
174
175    #[test]
176    fn parse_extra_whitespace_ok() {
177        let p = Pattern::from_ida("  48\t8B\n??\r\n05  ").unwrap();
178        assert_eq!(p.as_slice(), &[Some(0x48), Some(0x8B), None, Some(0x05)]);
179    }
180
181    #[test]
182    fn parse_empty_errors() {
183        let err = Pattern::from_ida("").unwrap_err();
184        assert_eq!(err.kind, ParseErrorKind::Empty);
185    }
186
187    #[test]
188    fn parse_invalid_hex_errors() {
189        // Both nibbles invalid — exercises the high-nibble `?` propagation
190        // in `parse_hex_byte` (`hex_digit(bytes[0])` returns None first).
191        let err = Pattern::from_ida("48 ZZ 05").unwrap_err();
192        assert_eq!(err.token_index, 1);
193        assert_eq!(err.kind, ParseErrorKind::InvalidHexDigit);
194    }
195
196    #[test]
197    fn parse_invalid_low_nibble_only() {
198        // High nibble VALID (`1`), low nibble INVALID (`Z`). Exercises the
199        // low-nibble `?` propagation in `parse_hex_byte` — the high-nibble
200        // path succeeded, then `hex_digit(bytes[1])` returns None and the
201        // error propagates. Without this test, the low-nibble `?` line is
202        // unreachable by any other input shape.
203        let err = Pattern::from_ida("48 1Z 05").unwrap_err();
204        assert_eq!(err.token_index, 1);
205        assert_eq!(err.kind, ParseErrorKind::InvalidHexDigit);
206    }
207
208    #[test]
209    fn parse_invalid_length_errors() {
210        let err = Pattern::from_ida("48 8 05").unwrap_err();
211        assert_eq!(err.token_index, 1);
212        assert_eq!(err.kind, ParseErrorKind::InvalidLength);
213    }
214
215    #[test]
216    fn pattern_new_from_vec() {
217        let p = Pattern::new(alloc::vec![Some(0x48), None, Some(0x89)]);
218        assert_eq!(p.len(), 3);
219        assert_eq!(p.as_slice()[1], None);
220    }
221
222    #[test]
223    fn pattern_is_empty() {
224        let p = Pattern::new(alloc::vec![]);
225        assert!(p.is_empty());
226        assert_eq!(p.len(), 0);
227    }
228}