patterns/lib.rs
1//! # Pattern matching library
2//!
3//! Allows you to search for a pattern within data via an iterator interface.
4//! This library uses the core::simd abstraction and is fully no_std, no alloc.
5//! Additionally, all panics are documented and are limited to pattern creation.
6//!
7//! ## Usage
8//!
9//! ```rs
10//! use patterns::Pattern;
11//!
12//! let data = [0_u8; 1_000_00];
13//! // Allows . and ? as wildcard.
14//! // Any number of wildcard characters between spaces is considered a wildcard byte.
15//! let pattern: Pattern = "01 02 00 ? 59 ff".parse().unwrap();
16//! let mut iterator = pattern.matches(&data);
17//!
18//! for _found in iterator {
19//! // use _found
20//! }
21//! ```
22//!
23//! More advanced use cases may also specify a target alignment required to
24//! match, or the LANE size with which to search:
25//!
26//! ```rs
27//! use patterns::Pattern;
28//!
29//! static PATTERN: Pattern<4, 64> = Pattern::new("00 01 02 . ff");
30//! ```
31//!
32//! ## Limitations
33//!
34//! - The maximum amount of bytes supported inside a pattern are determined by
35//! the chosen 2nd const parameter (default 64)
36//! - Target alignment of the pattern to search for must be less or equal to
37//! that 2nd const parameter
38//! - The pointer of data to search through must adhere to these bounds:
39//! - `data.as_ptr() - 64 > `[`usize::MIN`]
40//! - `data.as_ptr() + data.len() + 64 < `[`usize::MAX`]
41//!
42//! In practice, it's impossible to be outside of these bounds when using an OS.
43
44// todos
45// optimize pattern.len() <= alignment
46// explore getting rid of pattern.length
47
48#![feature(portable_simd)]
49#![cfg_attr(not(feature = "std"), no_std)]
50// untested on big endian
51#![cfg(target_endian = "little")]
52
53pub use crate::{
54 pattern::{ParsePatternError, Pattern},
55 scanner::Scanner,
56};
57
58mod const_utils;
59#[cfg(feature = "std")]
60pub(crate) mod dispatch;
61mod masks;
62mod pattern;
63mod scanner;
64
65/// The type that holds a bit for each byte in `BYTES`
66type BytesMask = u64;
67
68const V128: usize = 16;
69const V256: usize = 32;
70const V512: usize = 64;
71const VUNKNOWN: usize = V512;
72
73/// Provides a constant optimizing `BYTES` (see [`Pattern`]) to target cpu simd
74/// width. This is a best-effort, defaulting to maximum supported bytes.
75///
76/// Note that `BYTES` also determines maximum pattern length.
77///
78/// There was no benchmark performed comparing different values of BYTES to
79/// assumed optimal platform target width.
80pub const OPTIMAL_BYTES: usize = default_vector_target_width();
81
82const fn default_vector_target_width() -> usize {
83 if (cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64")) && cfg!(target_feature = "neon")
84 {
85 return V128;
86 }
87 if cfg!(target_arch = "hexagon") {
88 if cfg!(target_feature = "hvx-length128b") {
89 // 1024 bits
90 return V512;
91 }
92 if cfg!(target_feature = "hvx") {
93 return V512;
94 }
95 }
96 if cfg!(target_arch = "mips") && cfg!(target_feature = "msa") {
97 return V128;
98 }
99 if cfg!(target_arch = "powerpc")
100 && (cfg!(target_feature = "vsx") || cfg!(target_feature = "altivec"))
101 {
102 return V128;
103 }
104 if (cfg!(target_arch = "riscv32") || cfg!(target_arch = "riscv64"))
105 && cfg!(target_feature = "v")
106 {
107 return V128;
108 }
109 if (cfg!(target_arch = "wasm32") || cfg!(target_arch = "wasm64"))
110 && cfg!(target_feature = "simd128")
111 {
112 return V128;
113 }
114 if cfg!(target_arch = "x86") {
115 if cfg!(target_feature = "avx512f") {
116 return V512;
117 }
118 if cfg!(target_feature = "avx2") {
119 return V256;
120 }
121 if cfg!(target_feature = "sse2") {
122 return V128;
123 }
124 }
125 VUNKNOWN
126}