Skip to main content

pe_sigscan/
lib.rs

1//! # pe-sigscan
2//!
3//! Fast in-process byte-pattern ("signature") scanning over the executable
4//! sections of a loaded PE (Portable Executable) module on Windows.
5//!
6//! This crate is a building block for game mods, hookers, debuggers, and any
7//! other in-process tool that needs to locate non-exported, non-vtable-
8//! accessible code by its byte signature. It mirrors the workflow common
9//! across the reverse-engineering ecosystem — derive a pattern from a
10//! disassembler (IDA, Ghidra, Binary Ninja, Cutter), then scan the live
11//! process's mapped image for it at runtime.
12//!
13//! ## Quick start
14//!
15//! ```no_run
16//! use pe_sigscan::{find_in_text, Pattern};
17//!
18//! // Get a module base via your preferred means (GetModuleHandleW,
19//! // PEB walk, etc.). For demonstration we assume a known base.
20//! # let module_base = 0usize;
21//!
22//! // Build a pattern from an IDA-style hex string. `?` and `??` are
23//! // wildcards; whitespace between bytes is ignored.
24//! let pat = Pattern::from_ida("48 8B 05 ?? ?? ?? ?? 48 89 41 08").unwrap();
25//!
26//! if let Some(addr) = find_in_text(module_base, pat.as_slice()) {
27//!     println!("matched at {addr:#x}");
28//! }
29//! ```
30//!
31//! Or with the `pattern!` macro (no allocation, fully `const`-eligible):
32//!
33//! ```
34//! use pe_sigscan::pattern;
35//!
36//! const SIG: &[Option<u8>] = pattern![0x48, 0x8B, _, _, 0x48, 0x89];
37//! assert_eq!(SIG.len(), 6);
38//! assert_eq!(SIG[0], Some(0x48));
39//! assert_eq!(SIG[2], None);
40//! ```
41//!
42//! ## Two scanning modes
43//!
44//! - [`find_in_text`] / [`count_in_text`] / [`iter_in_text`] — walk only
45//!   the section literally named `.text`. The simplest case, suitable for
46//!   MSVC-built DLLs that put everything in one code section.
47//! - [`find_in_exec_sections`] / [`count_in_exec_sections`] /
48//!   [`iter_in_exec_sections`] — walk every section whose
49//!   `IMAGE_SCN_MEM_EXECUTE` characteristic is set. Required when the
50//!   function you're scanning for might live in a companion section like
51//!   `.text$mn`, `.textbss`, a jump-table arena, or any of the
52//!   optimized-layout code sections that some compilers and linkers emit.
53//!
54//! Both modes have [`find_in_slice`] / [`count_in_slice`] / [`iter_in_slice`]
55//! companions that work on a `&[u8]` instead of a loaded PE — useful for
56//! offline analysis, unit testing, and scanning extracted bytes.
57//!
58//! ## Resolving rel32 displacements
59//!
60//! Real signature workflows almost always end with "match the
61//! instruction, then follow its `rel32` displacement to the actual target
62//! address". The [`resolve_rel32`] / [`resolve_rel32_at`] helpers package
63//! that arithmetic so callers don't reinvent the off-by-one-prone
64//! `next_ip + disp32` calculation:
65//!
66//! ```no_run
67//! use pe_sigscan::{find_in_text, pattern, resolve_rel32_at};
68//! # let module_base = 0usize;
69//!
70//! // mov rax, [rip+disp32]: 48 8B 05 ?? ?? ?? ?? (7 bytes total).
71//! const SIG: &[Option<u8>] = pattern![0x48, 0x8B, 0x05, _, _, _, _];
72//! if let Some(addr) = find_in_text(module_base, SIG) {
73//!     let target = unsafe { resolve_rel32_at(addr, 3, 7) };
74//!     println!("global at {target:#x}");
75//! }
76//! ```
77//!
78//! ## Why direct memory reads?
79//!
80//! The `.text` section of a loaded DLL is page-aligned, RX-protected, and
81//! stays committed for the lifetime of the module. There is no TOCTOU
82//! concern; bytes don't change between reads. A typical scan walks tens of
83//! megabytes of bytes — routing every probe through `ReadProcessMemory`
84//! would cost tens of millions of syscalls (minutes of wall time). This
85//! crate reads directly via raw pointer dereference, bounded to PE-declared
86//! section ranges.
87//!
88//! ## Safety
89//!
90//! Public functions take a `module_base: usize` you must obtain from the OS
91//! (e.g. `GetModuleHandleW`). The implementation parses the PE headers at
92//! that base before any other access, so a non-PE pointer is rejected
93//! cleanly. Inside the validated section ranges, the unsafe pointer reads
94//! are bounded by the `VirtualSize` field from the section header — outside
95//! the loader handing us a malformed PE (which the loader itself would have
96//! rejected), there is no path to an out-of-bounds read.
97//!
98//! The slice variants are safe by Rust's slice invariants and need no
99//! further trust from the caller.
100//!
101//! ## Platform
102//!
103//! Windows / PE only.
104//!
105//! The crate compiles on every platform — the parsing is pure compute —
106//! but the in-process function signatures assume a `module_base` that came
107//! from the Windows loader. On non-Windows targets, the slice variants
108//! still work for analysing PE bytes you have mapped manually.
109//!
110//! ## License
111//!
112//! MIT OR Apache-2.0.
113
114#![cfg_attr(not(any(feature = "std", test)), no_std)]
115#![warn(missing_docs)]
116#![warn(rust_2018_idioms)]
117#![warn(unreachable_pub)]
118#![allow(unsafe_op_in_unsafe_fn)]
119
120extern crate alloc;
121
122mod error;
123mod fastscan;
124mod instr;
125mod pattern;
126mod pe;
127mod scan;
128
129pub use crate::error::{ParseErrorKind, ParsePatternError};
130pub use crate::instr::{read_rel32, resolve_rel32, resolve_rel32_at};
131pub use crate::pattern::{Pattern, WildcardPattern};
132pub use crate::scan::{
133    count_in_exec_sections, count_in_slice, count_in_text, find_in_exec_sections, find_in_slice,
134    find_in_text, iter_in_exec_sections, iter_in_slice, iter_in_text, Matches, SliceMatches,
135};
136
137// Section-targeted scanners (feature `section-info`).
138//
139// `find_in_section` / `count_in_section` / `iter_in_section` live in
140// `scan.rs` alongside the always-available scanners; they're the
141// same shape as `find_in_text` etc. but take a section name as a
142// parameter, letting callers scan inside `.rdata`, `.pdata`,
143// `.text$mn`, etc. Internally they delegate to `crate::pe::find_section`.
144//
145// The feature also re-exports the section-lookup helpers from
146// `crate::pe` so that advanced users can implement their own
147// section-specific logic if needed.
148#[cfg(feature = "section-info")]
149pub use crate::scan::{count_in_section, find_in_section, iter_in_section};
150
151// `module_size` is a standalone reader for
152// `IMAGE_OPTIONAL_HEADER.SizeOfImage` — useful for cross-module
153// rel32 disambiguation (pairs naturally with the always-available
154// `resolve_rel32*` helpers). It is exported unconditionally.
155pub use crate::pe::module_size;
156
157// ---------------------------------------------------------------------------
158// pattern! macro
159// ---------------------------------------------------------------------------
160//
161// Macros must be defined at the crate root (or re-exported with
162// `#[macro_export]`) to be reachable as `pe_sigscan::pattern!`. We keep the
163// definition here rather than in a `macros` submodule so the public macro
164// path is the natural `crate::pattern!`.
165
166/// Build a `&'static [Option<u8>; N]` at compile time from a list of byte
167/// literals and `_` wildcards.
168///
169/// # Examples
170///
171/// ```
172/// use pe_sigscan::pattern;
173///
174/// // `_` is the wildcard token. Use byte literals (0xNN) for fixed bytes.
175/// const SIG: &[Option<u8>] = pattern![0x48, 0x8B, _, _, 0x48, 0x89];
176/// assert_eq!(SIG, &[Some(0x48), Some(0x8B), None, None, Some(0x48), Some(0x89)]);
177/// ```
178///
179/// This is the zero-cost / no-allocation alternative to
180/// [`Pattern::from_ida`]. Use it when the pattern is known at compile time
181/// (the common case for hard-coded signatures); use `Pattern::from_ida`
182/// when the pattern is loaded from config, a dump file, or user input at
183/// runtime.
184#[macro_export]
185macro_rules! pattern {
186    [ $( $tok:tt ),* $(,)? ] => {
187        &[ $( $crate::__pattern_token!($tok) ),* ]
188    };
189}
190
191/// Helper for [`pattern!`] — converts a single token into `Some(byte)` or
192/// `None`. Hidden from the public surface; users should not call this
193/// directly.
194#[doc(hidden)]
195#[macro_export]
196macro_rules! __pattern_token {
197    (_) => {
198        ::core::option::Option::<u8>::None
199    };
200    ($byte:literal) => {
201        ::core::option::Option::<u8>::Some($byte)
202    };
203}
204
205// ---------------------------------------------------------------------------
206// Crate-level integration tests
207// ---------------------------------------------------------------------------
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    // -- pattern! macro --------------------------------------------------
214
215    #[test]
216    fn pattern_macro_no_wildcards() {
217        const SIG: &[Option<u8>] = pattern![0x48, 0x8B, 0x05];
218        assert_eq!(SIG, &[Some(0x48), Some(0x8B), Some(0x05)]);
219    }
220
221    #[test]
222    fn pattern_macro_with_wildcards() {
223        const SIG: &[Option<u8>] = pattern![0x48, _, 0x05, _, _];
224        assert_eq!(SIG, &[Some(0x48), None, Some(0x05), None, None]);
225    }
226
227    #[test]
228    fn pattern_macro_trailing_comma() {
229        // `$(,)?` — trailing comma should compile.
230        const SIG: &[Option<u8>] = pattern![0x48, 0x8B,];
231        assert_eq!(SIG, &[Some(0x48), Some(0x8B)]);
232    }
233
234    #[test]
235    fn pattern_macro_empty() {
236        // Zero-token form should compile to an empty slice.
237        const SIG: &[Option<u8>] = pattern![];
238        assert!(SIG.is_empty());
239    }
240
241    #[test]
242    fn pattern_macro_single_byte() {
243        const SIG: &[Option<u8>] = pattern![0xCC];
244        assert_eq!(SIG, &[Some(0xCC)]);
245    }
246
247    #[test]
248    fn pattern_macro_single_wildcard() {
249        const SIG: &[Option<u8>] = pattern![_];
250        assert_eq!(SIG, &[None]);
251    }
252
253    // -- error display ---------------------------------------------------
254
255    #[test]
256    fn error_display_empty() {
257        let e = ParsePatternError {
258            token_index: 0,
259            kind: ParseErrorKind::Empty,
260        };
261        let s = alloc::format!("{e}");
262        assert!(s.contains("no tokens"), "got: {s}");
263    }
264
265    #[test]
266    fn error_display_invalid_length() {
267        let e = ParsePatternError {
268            token_index: 3,
269            kind: ParseErrorKind::InvalidLength,
270        };
271        let s = alloc::format!("{e}");
272        assert!(s.contains("token #3"), "got: {s}");
273        assert!(s.contains("two hex digits"), "got: {s}");
274    }
275
276    #[test]
277    fn error_display_invalid_hex_digit() {
278        let e = ParsePatternError {
279            token_index: 1,
280            kind: ParseErrorKind::InvalidHexDigit,
281        };
282        let s = alloc::format!("{e}");
283        assert!(s.contains("token #1"), "got: {s}");
284        assert!(s.contains("non-hex"), "got: {s}");
285    }
286
287    #[test]
288    fn error_is_copy_and_clone() {
289        let e = ParsePatternError {
290            token_index: 0,
291            kind: ParseErrorKind::Empty,
292        };
293        let copied = e;
294        let cloned = e.clone();
295        assert_eq!(copied, e);
296        assert_eq!(cloned, e);
297    }
298
299    #[test]
300    fn error_kind_equality() {
301        // Touch the `PartialEq` derive on every variant so coverage tools
302        // see the discriminant comparisons exercised.
303        assert_eq!(ParseErrorKind::Empty, ParseErrorKind::Empty);
304        assert_eq!(ParseErrorKind::InvalidLength, ParseErrorKind::InvalidLength);
305        assert_eq!(
306            ParseErrorKind::InvalidHexDigit,
307            ParseErrorKind::InvalidHexDigit
308        );
309        assert_ne!(ParseErrorKind::Empty, ParseErrorKind::InvalidLength);
310        assert_ne!(ParseErrorKind::Empty, ParseErrorKind::InvalidHexDigit);
311        assert_ne!(
312            ParseErrorKind::InvalidLength,
313            ParseErrorKind::InvalidHexDigit
314        );
315    }
316
317    #[cfg(feature = "std")]
318    #[test]
319    fn error_implements_std_error() {
320        // Existence proof: this only compiles if the trait impl exists.
321        fn assert_error<E: std::error::Error>(_: &E) {}
322        let e = ParsePatternError {
323            token_index: 0,
324            kind: ParseErrorKind::Empty,
325        };
326        assert_error(&e);
327    }
328
329    // -- Pattern API smoke -----------------------------------------------
330
331    #[test]
332    fn pattern_clone_and_eq() {
333        let p1 = Pattern::from_ida("48 8B ?? 89").unwrap();
334        let p2 = p1.clone();
335        assert_eq!(p1, p2);
336    }
337
338    #[test]
339    fn pattern_debug() {
340        // Touch the `Debug` derive so coverage records it.
341        let p = Pattern::from_ida("48").unwrap();
342        let s = alloc::format!("{p:?}");
343        assert!(s.contains("Pattern"), "got: {s}");
344    }
345
346    #[test]
347    fn pattern_as_slice_round_trip() {
348        let p = Pattern::from_ida("48 ??").unwrap();
349        let s = p.as_slice();
350        assert_eq!(s.len(), 2);
351        assert_eq!(s[0], Some(0x48));
352        assert_eq!(s[1], None);
353    }
354}