1use core::{
2 marker::PhantomData,
3 num::IntErrorKind,
4 ops::Not,
5 simd::{Mask, Simd},
6 str::FromStr,
7};
8
9use crate::{const_utils, Scanner, VUNKNOWN as DEFAULT_BYTES};
10
11#[derive(Clone, Debug)]
20#[must_use]
21pub struct Pattern<const ALIGNMENT: usize = 1, const BYTES: usize = DEFAULT_BYTES> {
22 pub(crate) bytes: Simd<u8, BYTES>,
24 pub(crate) mask: Mask<i8, BYTES>,
26 pub(crate) first_bytes: Simd<u8, BYTES>,
31 pub(crate) first_bytes_mask: Mask<i8, BYTES>,
36 pub(crate) first_byte_offset: u8,
38 pub(crate) length: u8,
40 phantom: PhantomData<[u8; ALIGNMENT]>,
41}
42
43impl<const ALIGNMENT: usize, const BYTES: usize> Pattern<ALIGNMENT, BYTES> {
44 const _ALIGNED: () = {
45 if ALIGNMENT > BYTES {
46 panic!("Pattern ALIGNMENT must be less or equal to BYTES");
47 }
48 };
49
50 #[inline]
56 pub const fn new(pattern: &str) -> Self {
57 match Self::from_str(pattern) {
58 Ok(p) => p,
59 Err(ParsePatternError::PatternTooLong) => panic!("PatternTooLong"),
60 Err(ParsePatternError::InvalidHexNumber(..)) => panic!("InvalidHexNumber"),
61 Err(ParsePatternError::MissingNonWildcardByte) => panic!("MissingNonWildcardByte"),
62 }
63 }
64
65 pub fn from_slice(bytes: &[u8], mask: u64) -> Self {
73 #[expect(
74 path_statements,
75 reason = "This forces evaluation of the const and emits a compile time error."
76 )]
77 Self::_ALIGNED;
78 let mut input: [u8; BYTES] = [0; BYTES];
79 let length = bytes.len().min(BYTES);
80 input[..length].copy_from_slice(bytes);
81 let mask = u64::MAX.checked_shr(length as u32).unwrap_or(0).not() & mask;
82 let bytes = Simd::<u8, BYTES>::from_array(input);
83 let mask = Mask::<i8, BYTES>::from_bitmask(mask.reverse_bits());
84 let mask_array = mask.to_simd();
85 let mask_array = mask_array.as_array().as_slice();
86
87 let first_byte_offset = find_first_byte_offset::<ALIGNMENT>(mask_array).unwrap();
88
89 let (first_bytes, first_bytes_mask) = fill_first_bytes::<ALIGNMENT, BYTES>(
90 &input[first_byte_offset..],
91 &mask_array[first_byte_offset..],
92 );
93
94 Self {
95 bytes,
96 mask,
97 first_bytes,
98 first_bytes_mask,
99 first_byte_offset: first_byte_offset as _,
100 length: length as _,
101 phantom: PhantomData,
102 }
103 }
104
105 pub const fn from_str(s: &str) -> Result<Self, ParsePatternError> {
114 #[expect(
115 path_statements,
116 reason = "This forces evaluation of the const and emits a compile time error."
117 )]
118 Self::_ALIGNED;
119 let bytes = const_utils::SplitAsciiWhitespace::new(s);
120
121 let length = bytes.clone().count();
122 if length > BYTES {
123 return Err(ParsePatternError::PatternTooLong);
124 }
125
126 let (buffer, mask) = {
127 let mut buffer = [0_u8; BYTES];
128 let mut mask = [0_i8; BYTES];
129 let mut index = 0;
130 let mut bytes = bytes;
131
132 loop {
133 let byte;
134 (bytes, byte) = bytes.next();
135 let byte = match byte {
136 Some(b) => b,
137 None => break,
138 };
139
140 if !const_utils::is_wildcard(byte) {
141 let parsed = match const_utils::hex_to_u8(byte) {
142 Ok(parsed) => parsed,
143 Err(e) => return Err(ParsePatternError::InvalidHexNumber(e)),
144 };
145 buffer[index] = parsed;
146 mask[index] = -1;
147 }
148
149 index += 1;
150 }
151
152 (buffer, mask)
153 };
154
155 let first_byte_offset = match find_first_byte_offset::<ALIGNMENT>(&mask) {
156 Ok(offset) => offset,
157 Err(e) => return Err(e),
158 };
159
160 let (_, chunk) = buffer.split_at(first_byte_offset);
161 let (_, mask_chunk) = mask.split_at(first_byte_offset);
162 let (first_bytes, first_bytes_mask) =
163 fill_first_bytes::<ALIGNMENT, BYTES>(chunk, mask_chunk);
164
165 let mask = Simd::from_array(mask);
170 let mask = unsafe { *(&mask as *const _ as *const _) };
171
172 Ok(Self {
173 bytes: Simd::<u8, BYTES>::from_array(buffer),
174 mask,
175 first_bytes,
176 first_bytes_mask,
177 first_byte_offset: first_byte_offset as _,
178 length: length as _,
179 phantom: PhantomData,
180 })
181 }
182
183 #[inline]
185 pub fn matches<'pattern, 'data>(
186 &'pattern self,
187 data: &'data [u8],
188 ) -> Scanner<'pattern, 'data, ALIGNMENT, BYTES> {
189 Scanner::new(self, data)
190 }
191}
192
193const fn find_first_byte_offset<const ALIGNMENT: usize>(
194 mut mask: &[i8],
195) -> Result<usize, ParsePatternError> {
196 let mut i = 0;
197 let mut smallest = 0;
198 let mut highest_count = 0;
199 loop {
200 if mask.len() < ALIGNMENT {
201 break;
202 }
203 let chunk;
204 (chunk, mask) = mask.split_at(ALIGNMENT);
205
206 let mut j = 0;
207 let mut count = 0;
208 while j < chunk.len() {
209 count += (chunk[j] != 0) as usize;
210 j += 1;
211 }
212
213 let chunk_count = count;
214 if chunk_count > highest_count {
215 highest_count = chunk_count;
216 smallest = i;
217 }
218
219 i += 1;
220 }
221
222 if highest_count == 0 {
223 Err(ParsePatternError::MissingNonWildcardByte)
224 } else {
225 Ok(smallest * ALIGNMENT)
226 }
227}
228
229const fn fill_first_bytes<const ALIGNMENT: usize, const BYTES: usize>(
230 chunk: &[u8],
231 mask: &[i8],
232) -> (Simd<u8, BYTES>, Mask<i8, BYTES>) {
233 let mut first = [0u8; BYTES];
234 let mut first_mask = [0i8; BYTES];
235
236 let mut i = 0;
237
238 while i < BYTES / ALIGNMENT {
239 let mut j = 0;
240 while j < ALIGNMENT {
241 first[i * ALIGNMENT + j] = chunk[j];
242 first_mask[i * ALIGNMENT + j] = !mask[j];
243 j += 1;
244 }
245 i += 1;
246 }
247
248 let bytes = Simd::from_array(first);
249 let mask = Simd::from_array(first_mask);
254 let mask = unsafe { *(&mask as *const _ as *const _) };
255
256 (bytes, mask)
257}
258
259impl<const ALIGNMENT: usize, const BYTES: usize> FromStr for Pattern<ALIGNMENT, BYTES> {
260 type Err = ParsePatternError;
261
262 #[inline]
263 fn from_str(s: &str) -> Result<Self, Self::Err> {
264 Self::from_str(s)
265 }
266}
267
268#[derive(Debug, Clone, PartialEq, Eq)]
269#[non_exhaustive]
270pub enum ParsePatternError {
271 PatternTooLong,
272 InvalidHexNumber(IntErrorKind),
273 MissingNonWildcardByte,
274}
275
276impl From<IntErrorKind> for ParsePatternError {
277 #[inline]
278 fn from(value: IntErrorKind) -> Self {
279 Self::InvalidHexNumber(value)
280 }
281}