Skip to main content

patterns/
lib.rs

1//! # Pattern matching library
2//!
3//! Allows you to search for a pattern within data via an iterator interface.
4//! This library uses the core::simd abstraction and is fully no_std, no alloc.
5//! Additionally, all panics are documented and are limited to pattern creation.
6//!
7//! ## Usage
8//!
9//! ```rs
10//! use patterns::Pattern;
11//!
12//! let data = [0_u8; 1_000_00];
13//! // Allows . and ? as wildcard.
14//! // Any number of wildcard characters between spaces is considered a wildcard byte.
15//! let pattern: Pattern = "01 02 00 ? 59 ff".parse().unwrap();
16//! let mut iterator = pattern.matches(&data);
17//!
18//! for _found in iterator {
19//!     // use _found
20//! }
21//! ```
22//!
23//! More advanced use cases may also specify a target alignment required to
24//! match, or the LANE size with which to search:
25//!
26//! ```rs
27//! use patterns::Pattern;
28//!
29//! static PATTERN: Pattern<4, 64> = Pattern::new("00 01 02 . ff");
30//! ```
31//!
32//! ## Limitations
33//!
34//! - The maximum amount of bytes supported inside a pattern are determined by
35//!   the chosen 2nd const parameter (default 64)
36//! - Target alignment of the pattern to search for must be less or equal to
37//!   that 2nd const parameter
38//! - The pointer of data to search through must adhere to these bounds:
39//!     - `data.as_ptr() - 64 > `[`usize::MIN`]
40//!     - `data.as_ptr() + data.len() + 64 < `[`usize::MAX`]
41//!
42//! In practice, it's impossible to be outside of these bounds when using an OS.
43
44// todos
45// optimize pattern.len() <= alignment
46// explore getting rid of pattern.length
47
48#![feature(portable_simd)]
49#![cfg_attr(not(feature = "std"), no_std)]
50// untested on big endian
51#![cfg(target_endian = "little")]
52
53pub use crate::{
54    pattern::{ParsePatternError, Pattern},
55    scanner::Scanner,
56};
57
58mod const_utils;
59#[cfg(feature = "std")]
60pub(crate) mod dispatch;
61mod masks;
62mod pattern;
63mod scanner;
64
65/// The type that holds a bit for each byte in `BYTES`
66type BytesMask = u64;
67
68const V128: usize = 16;
69const V256: usize = 32;
70const V512: usize = 64;
71const VUNKNOWN: usize = V512;
72
73/// Provides a constant optimizing `BYTES` (see [`Pattern`]) to target cpu simd
74/// width. This is a best-effort, defaulting to maximum supported bytes.
75///
76/// Note that `BYTES` also determines maximum pattern length.
77///
78/// There was no benchmark performed comparing different values of BYTES to
79/// assumed optimal platform target width.
80pub const OPTIMAL_BYTES: usize = default_vector_target_width();
81
82const fn default_vector_target_width() -> usize {
83    if (cfg!(target_arch = "arm") || cfg!(target_arch = "aarch64")) && cfg!(target_feature = "neon")
84    {
85        return V128;
86    }
87    if cfg!(target_arch = "hexagon") {
88        if cfg!(target_feature = "hvx-length128b") {
89            // 1024 bits
90            return V512;
91        }
92        if cfg!(target_feature = "hvx") {
93            return V512;
94        }
95    }
96    if cfg!(target_arch = "mips") && cfg!(target_feature = "msa") {
97        return V128;
98    }
99    if cfg!(target_arch = "powerpc")
100        && (cfg!(target_feature = "vsx") || cfg!(target_feature = "altivec"))
101    {
102        return V128;
103    }
104    if (cfg!(target_arch = "riscv32") || cfg!(target_arch = "riscv64"))
105        && cfg!(target_feature = "v")
106    {
107        return V128;
108    }
109    if (cfg!(target_arch = "wasm32") || cfg!(target_arch = "wasm64"))
110        && cfg!(target_feature = "simd128")
111    {
112        return V128;
113    }
114    if cfg!(target_arch = "x86") {
115        if cfg!(target_feature = "avx512f") {
116            return V512;
117        }
118        if cfg!(target_feature = "avx2") {
119            return V256;
120        }
121        if cfg!(target_feature = "sse2") {
122            return V128;
123        }
124    }
125    VUNKNOWN
126}