rsonpath/classification/simd.rs
1//! SIMD configuration and runtime dispatch.
2//!
3//! The core of our approach is the two macros: [`config_simd`] and [`dispatch_simd`].
4//!
5//! ## What?
6//!
7//! We need to strike a delicate balance between portable code and compiler optimizations.
8//! All SIMD code should be maximally inlined. To that end we need `target_feature` annotated
9//! functions, so that SIMD intrinsics are actually emitted, but these are hard barriers for
10//! inlining if called from non-`target_feature` functions.
11//!
12//! The ideal would be to have a single `target_feature`-annotated entry point and then force
13//! the compiler to inline everything there. This isn't that easy, because you cannot *really*
14//! force the compiler to inline everything, and even if you could that can lead to inefficient compilation
15//! (large functions are harder to optimize, large code size negatively impacts caching, etc.).
16//!
17//! On the other end of portability there's runtime checking of CPU capabilities.
18//! That introduces an overhead and cannot be used everywhere, so in any case it'd have to be added
19//! in specific places that then call `target_feature`-annotated functions.
20//!
21//! The [`multiversion`](https://calebzulawski.github.io/rust-simd-book/3.3-multiversion.html) crate
22//! provides a near-ideal tradeoff, where one can annotate a function such that multiple definitions of it
23//! are expanded with different `target_feature` sets, and an efficient, cached runtime check is performed
24//! at entry to that function.
25//!
26//! For our crate we can do slightly better. The idea is to do the entire configuration of SIMD once and
27//! upfront ([`configure`] producing [`SimdConfiguration`]) and save it. Then we can use [`config_simd`]
28//! to create a [`Simd`] implementation that encapsulates all the relevant information in its type arguments.
29//! An entry point function can take a generic `V: Simd` parameter so that the compiler specializes it
30//! for all supported CPU capability configurations. Finally, to get the correct `target_feature` annotation
31//! we do a similar thing to `multiversion`, but using a constant value from the `V` type, which allows
32//! the compiler to optimize the check away when monomorphizing the function.
33//!
34//! An example idiomatic usage would be:
35//!
36//! ```rust,ignore
37//! fn entry() -> Result<(), EngineError> {
38//! let configuration = simd::configure();
39//! config_simd!(configuration => |simd| {
40//! run(simd)
41//! })
42//! }
43//!
44//! fn run<V: Simd>(simd: V) -> Result<(), EngineError> {
45//! dispatch_simd!(simd; simd => {
46//! fn<V: Simd>(simd: V) -> Result<(), EngineError>
47//! {
48//! // Actual implementation using SIMD capabilities from `simd`.
49//! }
50//! });
51//! }
52//! ```
53//!
54//! Assume for a second we only have 3 SIMD combinations:
55//! - `+avx2,+pclmulqdq,+popcnt`
56//! - `+sse2,+popcnt`
57//! - `nosimd`
58//!
59//! The above code gets expanded to (approximately):
60//!
61//! ```rust,ignore
62//! fn entry() -> Result<(), EngineError> {
63//! let configuration = simd::configure();
64//! {
65//! match configuration.highest_simd() {
66//! SimdTag::Avx2 => {
67//! let simd = ResolvedSimd::<
68//! quotes::avx2_64::Constructor,
69//! structural::avx2_64::Constructor,
70//! depth::avx2_64::Constructor,
71//! memmem::avx2_64::Constructor,
72//! simd::AVX2_PCLMULQDQ_POPCNT,
73//! >::new();
74//! run(simd)
75//! },
76//! SimdTag::Sse2 if conf.fast_popcnt() => {
77//! let simd = ResolvedSimd::<
78//! quotes::nosimd::Constructor,
79//! structural::nosimd::Constructor,
80//! depth::sse2_64::Constructor,
81//! memmem::sse2_64::Constructor,
82//! simd::SSE2_POPCNT,
83//! >::new();
84//! run(simd)
85//! },
86//! _ => {
87//! let simd = ResolvedSimd::<
88//! quotes::nosimd::Constructor,
89//! structural::nosimd::Constructor,
90//! depth::nosimd::Constructor,
91//! memmem::nosimd::Constructor,
92//! simd::NOSIMD,
93//! >::new();
94//! run(simd)
95//! },
96//! }
97//! }
98//! }
99//!
100//! fn run<V: Simd>(simd: V) -> Result<(), EngineError> {
101//! #[target_feature(enable = "avx2")]
102//! #[target_feature(enable = "pclmulqdq")]
103//! #[target_feature(enable = "popcnt")]
104//! unsafe fn avx2_pclmulqdq_popcnt<V: Simd>(simd: V) -> Result<(), EngineError> {
105//! // Actual implementation using SIMD capabilities from `simd`.
106//! }
107//! #[target_feature(enable = "sse2")]
108//! #[target_feature(enable = "popcnt")]
109//! unsafe fn sse2_popcnt<V: Simd>(simd: V) -> Result<(), EngineError> {
110//! // Actual implementation using SIMD capabilities from `simd`.
111//! }
112//! unsafe fn nosimd<V: Simd>(simd: V) -> Result<(), EngineError> {
113//! // Actual implementation using SIMD capabilities from `simd`.
114//! }
115//!
116//! // SAFETY: depends on the provided SimdConfig, which cannot be incorrectly constructed.
117//! unsafe {
118//! match simd.dispatch_tag() {
119//! simd::AVX2_PCLMULQDQ_POPCNT => avx2_pclmulqdq_popcnt(simd),
120//! simd::SSE2_POPCNT => sse2_popcnt(simd),
121//! _ => nosimd(simd),
122//! }
123//! }
124//! }
125//! ```
126//!
127//! Now because all of the logic in the `dispatch_simd` is done over the `V` type constants,
128//! the compiler will produce a `run` function for the three possible `ResolvedSimd` concrete
129//! types used and then constant-fold the body to produce code equivalent to this (not valid Rust code):
130//!
131//! ```rust,ignore
132//! fn run(simd: ResolvedSimd::<
133//! quotes::avx2_64::Constructor,
134//! structural::avx2_64::Constructor,
135//! depth::avx2_64::Constructor,
136//! memmem::avx2_64::Constructor,
137//! simd::AVX2_PCLMULQDQ_POPCNT,
138//! >) -> Result<(), EngineError> {
139//! #[target_feature(enable = "avx2")]
140//! #[target_feature(enable = "pclmulqdq")]
141//! #[target_feature(enable = "popcnt")]
142//! unsafe fn avx2_pclmulqdq_popcnt(simd: Avx2Simd = ResolvedSimd::<
143//! quotes::avx2_64::Constructor,
144//! structural::avx2_64::Constructor,
145//! depth::avx2_64::Constructor,
146//! memmem::avx2_64::Constructor,
147//! simd::AVX2_PCLMULQDQ_POPCNT,
148//! >) -> Result<(), EngineError> {
149//! // Actual implementation using SIMD capabilities from `simd`.
150//! }
151//!
152//! unsafe { avx2_pclmulqdq_popcnt(simd) }
153//! }
154//!
155//! fn run(simd: ResolvedSimd::<
156//! quotes::nosimd::Constructor,
157//! structural::nosimd::Constructor,
158//! depth::sse2_64::Constructor,
159//! memmem::sse2_64::Constructor,
160//! simd::SSE2_POPCNT,
161//! >) -> Result<(), EngineError> {
162//! #[target_feature(enable = "sse2")]
163//! #[target_feature(enable = "popcnt")]
164//! unsafe fn sse2_popcnt(simd: ResolvedSimd::<
165//! quotes::nosimd::Constructor,
166//! structural::nosimd::Constructor,
167//! depth::sse2_64::Constructor,
168//! memmem::sse2_64::Constructor,
169//! simd::SSE2_POPCNT,
170//! >) -> Result<(), EngineError> {
171//! // Actual implementation using SIMD capabilities from `simd`.
172//! }
173//!
174//! unsafe { sse2_popcnt(simd) }
175//! }
176//!
177//! fn run(simd: ResolvedSimd::<
178//! quotes::nosimd::Constructor,
179//! structural::nosimd::Constructor,
180//! depth::nosimd::Constructor,
181//! memmem::nosimd::Constructor,
182//! simd::NOSIMD,
183//! >) -> Result<(), EngineError> {
184//! unsafe fn nosimd(simd: ResolvedSimd::<
185//! quotes::nosimd::Constructor,
186//! structural::nosimd::Constructor,
187//! depth::nosimd::Constructor,
188//! memmem::nosimd::Constructor,
189//! simd::NOSIMD,
190//! >) -> Result<(), EngineError> {
191//! // Actual implementation using SIMD capabilities from `simd`.
192//! }
193//!
194//! unsafe { nosimd(simd) }
195//! }
196//! ```
197//!
198//! The compiler is then free to optimize the inner functions fully, and the entire dispatch
199//! happens once when `entry` is called.
200//!
201//! The config dispatch is done at start of the engine in one of the functions that run the executor.
202//! The simd dispatch is put into the big entry points of the executor logic - `run_on_subtree`,
203//! `run_head_skipping`, and `run_tail_skipping`. These are generally big enough to not be inlined by the compiler,
204//! and long-running enough for that to not be an issue.
205use super::{
206 depth::{DepthImpl, DepthIterator, DepthIteratorResumeOutcome},
207 memmem::{Memmem, MemmemImpl},
208 quotes::{InnerIter, QuoteClassifiedIterator, QuotesImpl, ResumedQuoteClassifier},
209 structural::{BracketType, StructuralImpl, StructuralIterator},
210 ResumeClassifierState,
211};
212use crate::{
213 input::{Input, InputBlockIterator},
214 result::InputRecorder,
215 MaskType, BLOCK_SIZE,
216};
217use cfg_if::cfg_if;
218use std::{fmt::Display, marker::PhantomData};
219
220/// All SIMD capabilities of the engine and classifier types.
221pub(crate) trait Simd: Copy {
222 /// The implementation of [`QuoteClassifiedIterator`] of this SIMD configuration.
223 type QuotesClassifier<'i, I>: QuoteClassifiedIterator<'i, I, MaskType, BLOCK_SIZE> + InnerIter<I>
224 where
225 I: InputBlockIterator<'i, BLOCK_SIZE>;
226
227 /// The implementation of [`StructuralIterator`] of this SIMD configuration.
228 type StructuralClassifier<'i, I>: StructuralIterator<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>
229 where
230 I: InputBlockIterator<'i, BLOCK_SIZE>;
231
232 /// The implementation of [`DepthIterator`] of this SIMD configuration.
233 type DepthClassifier<'i, I>: DepthIterator<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>
234 where
235 I: InputBlockIterator<'i, BLOCK_SIZE>;
236
237 /// The implementation of [`Memmem`] of this SIMD configuration.
238 type MemmemClassifier<'i, 'b, 'r, I, R>: Memmem<'i, 'b, 'r, I, BLOCK_SIZE>
239 where
240 I: Input + 'i,
241 <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>: 'b,
242 R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>> + 'r,
243 'i: 'r;
244
245 /// Get a unique descriptor of the enabled SIMD capabilities.
246 ///
247 /// The value should correspond to the `const`s defined in [`simd`](`self`),
248 /// like [`AVX2_PCLMULQDQ_POPCNT`] or [`NOSIMD`].
249 #[must_use]
250 #[allow(dead_code)] // Not used in targets that have only one possible tag (NOSIMD for non-x86 for example).
251 fn dispatch_tag(self) -> usize;
252
253 /// Walk through the JSON document given by the `iter` and classify quoted sequences.
254 #[must_use]
255 fn classify_quoted_sequences<'i, I>(self, iter: I) -> Self::QuotesClassifier<'i, I>
256 where
257 I: InputBlockIterator<'i, BLOCK_SIZE>;
258
259 /// Resume quote classification from an `iter` and, optionally, an already read
260 /// block that will be used as the first block to classify.
261 #[must_use]
262 fn resume_quote_classification<'i, I>(
263 self,
264 iter: I,
265 first_block: Option<I::Block>,
266 ) -> ResumedQuoteClassifier<Self::QuotesClassifier<'i, I>, I::Block, MaskType, BLOCK_SIZE>
267 where
268 I: InputBlockIterator<'i, BLOCK_SIZE>;
269
270 /// Walk through the JSON document quote-classified by `iter` and iterate over all
271 /// occurrences of structural characters in it.
272 #[must_use]
273 fn classify_structural_characters<'i, I>(
274 self,
275 iter: Self::QuotesClassifier<'i, I>,
276 ) -> Self::StructuralClassifier<'i, I>
277 where
278 I: InputBlockIterator<'i, BLOCK_SIZE>;
279
280 /// Resume classification using a state retrieved from a previously
281 /// used classifier via the [`stop`](StructuralIterator::stop) function.
282 #[must_use]
283 fn resume_structural_classification<'i, I>(
284 self,
285 state: ResumeClassifierState<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>,
286 ) -> Self::StructuralClassifier<'i, I>
287 where
288 I: InputBlockIterator<'i, BLOCK_SIZE>;
289
290 /// Resume classification using a state retrieved from a previously
291 /// used classifier via the [`stop`](DepthIterator::stop) function.
292 #[must_use]
293 fn resume_depth_classification<'i, I>(
294 self,
295 state: ResumeClassifierState<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>,
296 opening: BracketType,
297 ) -> DepthIteratorResumeOutcome<
298 'i,
299 I,
300 Self::QuotesClassifier<'i, I>,
301 Self::DepthClassifier<'i, I>,
302 MaskType,
303 BLOCK_SIZE,
304 >
305 where
306 I: InputBlockIterator<'i, BLOCK_SIZE>;
307
308 /// Create a classifier that can look for occurrences of a key in the `iter`.
309 #[must_use]
310 fn memmem<'i, 'b, 'r, I, R>(
311 self,
312 input: &'i I,
313 iter: &'b mut <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
314 ) -> Self::MemmemClassifier<'i, 'b, 'r, I, R>
315 where
316 I: Input,
317 R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>>,
318 'i: 'r;
319}
320
321pub(crate) struct ResolvedSimd<Q, S, D, M, const TARGET: usize> {
322 phantom: PhantomData<(Q, S, D, M)>,
323}
324
325impl<Q, S, D, M, const TARGET: usize> Clone for ResolvedSimd<Q, S, D, M, TARGET> {
326 fn clone(&self) -> Self {
327 *self
328 }
329}
330
331impl<Q, S, D, M, const TARGET: usize> Copy for ResolvedSimd<Q, S, D, M, TARGET> {}
332
333impl<Q, S, D, M, const TARGET: usize> ResolvedSimd<Q, S, D, M, TARGET> {
334 pub(crate) fn new() -> Self {
335 Self { phantom: PhantomData }
336 }
337}
338
339impl<Q, S, D, M, const TARGET: usize> Simd for ResolvedSimd<Q, S, D, M, TARGET>
340where
341 Q: QuotesImpl,
342 S: StructuralImpl,
343 D: DepthImpl,
344 M: MemmemImpl,
345{
346 type QuotesClassifier<'i, I>
347 = Q::Classifier<'i, I>
348 where
349 I: InputBlockIterator<'i, BLOCK_SIZE>;
350
351 type StructuralClassifier<'i, I>
352 = S::Classifier<'i, I, Self::QuotesClassifier<'i, I>>
353 where
354 I: InputBlockIterator<'i, BLOCK_SIZE>;
355
356 type DepthClassifier<'i, I>
357 = D::Classifier<'i, I, Self::QuotesClassifier<'i, I>>
358 where
359 I: InputBlockIterator<'i, BLOCK_SIZE>;
360
361 type MemmemClassifier<'i, 'b, 'r, I, R>
362 = M::Classifier<'i, 'b, 'r, I, R>
363 where
364 I: Input + 'i,
365 <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>: 'b,
366 R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>> + 'r,
367 'i: 'r;
368
369 #[inline(always)]
370 fn dispatch_tag(self) -> usize {
371 TARGET
372 }
373
374 #[inline(always)]
375 fn classify_quoted_sequences<'i, I>(self, iter: I) -> Self::QuotesClassifier<'i, I>
376 where
377 I: InputBlockIterator<'i, BLOCK_SIZE>,
378 {
379 Q::new(iter)
380 }
381
382 #[inline(always)]
383 fn resume_quote_classification<'i, I>(
384 self,
385 iter: I,
386 first_block: Option<I::Block>,
387 ) -> ResumedQuoteClassifier<Self::QuotesClassifier<'i, I>, I::Block, MaskType, BLOCK_SIZE>
388 where
389 I: InputBlockIterator<'i, BLOCK_SIZE>,
390 {
391 Q::resume(iter, first_block)
392 }
393
394 #[inline(always)]
395 fn classify_structural_characters<'i, I>(
396 self,
397 iter: Self::QuotesClassifier<'i, I>,
398 ) -> Self::StructuralClassifier<'i, I>
399 where
400 I: InputBlockIterator<'i, BLOCK_SIZE>,
401 {
402 S::new(iter)
403 }
404
405 #[inline(always)]
406 fn resume_structural_classification<'i, I>(
407 self,
408 state: ResumeClassifierState<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>,
409 ) -> Self::StructuralClassifier<'i, I>
410 where
411 I: InputBlockIterator<'i, BLOCK_SIZE>,
412 {
413 S::resume(state)
414 }
415
416 #[inline(always)]
417 fn resume_depth_classification<'i, I>(
418 self,
419 state: ResumeClassifierState<'i, I, Self::QuotesClassifier<'i, I>, MaskType, BLOCK_SIZE>,
420 opening: BracketType,
421 ) -> DepthIteratorResumeOutcome<
422 'i,
423 I,
424 Self::QuotesClassifier<'i, I>,
425 Self::DepthClassifier<'i, I>,
426 MaskType,
427 BLOCK_SIZE,
428 >
429 where
430 I: InputBlockIterator<'i, BLOCK_SIZE>,
431 {
432 D::resume(state, opening)
433 }
434
435 #[inline(always)]
436 fn memmem<'i, 'b, 'r, I, R>(
437 self,
438 input: &'i I,
439 iter: &'b mut <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
440 ) -> Self::MemmemClassifier<'i, 'b, 'r, I, R>
441 where
442 I: Input,
443 R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>>,
444 'i: 'r,
445 {
446 M::memmem(input, iter)
447 }
448}
449
450/// SIMD extension recognized by rsonpath.
451#[derive(Clone, Copy, PartialEq, Eq, Debug)]
452pub(crate) enum SimdTag {
453 /// No SIMD capabilities detected.
454 Nosimd,
455 /// SSE2 detected.
456 Sse2,
457 /// SSSE3 detected.
458 Ssse3,
459 /// AVX2 detected.
460 Avx2,
461}
462
463/// Runtime-detected SIMD configuration guiding how to construct a [`Simd`] implementation for the engine.
464#[derive(Debug, Clone, Copy)]
465pub(crate) struct SimdConfiguration {
466 highest_simd: SimdTag,
467 fast_quotes: bool,
468 fast_popcnt: bool,
469}
470
471/// Name of the env variable that can be used to force a given [`SimdConfiguration`] to be used.
472pub(crate) const SIMD_OVERRIDE_ENV_VARIABLE: &str = "RSONPATH_UNSAFE_FORCE_SIMD";
473
474impl SimdConfiguration {
475 pub(crate) fn highest_simd(&self) -> SimdTag {
476 self.highest_simd
477 }
478
479 pub(crate) fn fast_quotes(&self) -> bool {
480 self.fast_quotes
481 }
482
483 pub(crate) fn fast_popcnt(&self) -> bool {
484 self.fast_popcnt
485 }
486
487 fn try_parse(str: &str) -> Option<Self> {
488 let parts = str.split(';').collect::<Vec<_>>();
489
490 if parts.len() != 3 {
491 return None;
492 }
493
494 let simd_slug = parts[0];
495 let quotes_str = parts[1];
496 let popcnt_str = parts[2];
497
498 let simd = match simd_slug.to_ascii_lowercase().as_ref() {
499 "nosimd" => Some(SimdTag::Nosimd),
500 "sse2" => Some(SimdTag::Sse2),
501 "ssse3" => Some(SimdTag::Ssse3),
502 "avx2" => Some(SimdTag::Avx2),
503 _ => None,
504 };
505 let quotes = match quotes_str.to_ascii_lowercase().as_ref() {
506 "fast_quotes" => Some(true),
507 "slow_quotes" => Some(false),
508 _ => None,
509 };
510 let popcnt = match popcnt_str.to_ascii_lowercase().as_ref() {
511 "fast_popcnt" => Some(true),
512 "slow_popcnt" => Some(false),
513 _ => None,
514 };
515
516 Some(Self {
517 highest_simd: simd?,
518 fast_quotes: quotes?,
519 fast_popcnt: popcnt?,
520 })
521 }
522}
523
524/// Detect available SIMD features and return the best possible [`SimdConfiguration`]
525/// for the current system.
526///
527/// # Safety
528/// If the [`SIMD_OVERRIDE_ENV_VARIABLE`] env variable is defined, it MUST be a valid SIMD
529/// configuration for the current system. Otherwise, undefined behavior will follow.
530/// For example, setting the value to enable AVX2 on a platform without AVX2 is unsound.
531///
532/// # Panics
533/// If the [`SIMD_OVERRIDE_ENV_VARIABLE`] env variable is defined and does not contain a valid
534/// SIMD configuration, an immediate panic is raised.
535#[inline]
536#[must_use]
537pub(crate) fn configure() -> SimdConfiguration {
538 if let Ok(simd) = std::env::var(SIMD_OVERRIDE_ENV_VARIABLE) {
539 #[allow(clippy::expect_used)] // This is already an unsafe override, not expected to be used by users.
540 return SimdConfiguration::try_parse(&simd).expect("invalid simd configuration override");
541 }
542
543 cfg_if! {
544 if #[cfg(not(feature = "simd"))]
545 {
546 let highest_simd = SimdTag::Nosimd;
547 let fast_quotes = false;
548 let fast_popcnt = false;
549 }
550 else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
551 {
552 let highest_simd = if is_x86_feature_detected!("avx2") {
553 SimdTag::Avx2
554 } else if is_x86_feature_detected!("ssse3") {
555 SimdTag::Ssse3
556 } else if is_x86_feature_detected!("sse2") {
557 SimdTag::Sse2
558 } else {
559 SimdTag::Nosimd
560 };
561
562 let fast_quotes = is_x86_feature_detected!("pclmulqdq");
563 let fast_popcnt = is_x86_feature_detected!("popcnt");
564 }
565 else
566 {
567 let highest_simd = SimdTag::Nosimd;
568 let fast_quotes = false;
569 let fast_popcnt = false;
570 }
571 }
572
573 SimdConfiguration {
574 highest_simd,
575 fast_quotes,
576 fast_popcnt,
577 }
578}
579
580impl Display for SimdConfiguration {
581 #[inline]
582 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
583 let simd_slug = match self.highest_simd {
584 SimdTag::Nosimd => "nosimd",
585 SimdTag::Sse2 => "sse2",
586 SimdTag::Ssse3 => "ssse3",
587 SimdTag::Avx2 => "avx2",
588 };
589 let quote_desc = if self.fast_quotes { "fast_quotes" } else { "slow_quotes" };
590 let popcnt_desc = if self.fast_popcnt { "fast_popcnt" } else { "slow_popcnt" };
591
592 write!(f, "{simd_slug};{quote_desc};{popcnt_desc}")
593 }
594}
595
596pub(crate) const NOSIMD: usize = 0;
597
598cfg_if! {
599 if #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] {
600 pub(crate) const AVX2_PCLMULQDQ_POPCNT: usize = 1;
601 pub(crate) const SSSE3_PCLMULQDQ_POPCNT: usize = 2;
602 pub(crate) const SSSE3_PCLMULQDQ: usize = 3;
603 pub(crate) const SSSE3_POPCNT: usize = 4;
604 pub(crate) const SSSE3: usize = 5;
605 pub(crate) const SSE2_PCLMULQDQ_POPCNT: usize = 6;
606 pub(crate) const SSE2_PCLMULQDQ: usize = 7;
607 pub(crate) const SSE2_POPCNT: usize = 8;
608 pub(crate) const SSE2: usize = 9;
609
610 macro_rules! dispatch_simd {
611 ($simd:expr; $( $arg:expr ),* => fn $( $fn:tt )*) => {{
612 #[target_feature(enable = "avx2")]
613 #[target_feature(enable = "pclmulqdq")]
614 #[target_feature(enable = "popcnt")]
615 unsafe fn avx2_pclmulqdq_popcnt $($fn)*
616 #[target_feature(enable = "ssse3")]
617 #[target_feature(enable = "pclmulqdq")]
618 #[target_feature(enable = "popcnt")]
619 unsafe fn ssse3_pclmulqdq_popcnt $($fn)*
620 #[target_feature(enable = "ssse3")]
621 #[target_feature(enable = "pclmulqdq")]
622 unsafe fn ssse3_pclmulqdq $($fn)*
623 #[target_feature(enable = "ssse3")]
624 #[target_feature(enable = "popcnt")]
625 unsafe fn ssse3_popcnt $($fn)*
626 #[target_feature(enable = "ssse3")]
627 unsafe fn ssse3 $($fn)*
628 #[target_feature(enable = "sse2")]
629 #[target_feature(enable = "pclmulqdq")]
630 #[target_feature(enable = "popcnt")]
631 unsafe fn sse2_pclmulqdq_popcnt $($fn)*
632 #[target_feature(enable = "sse2")]
633 #[target_feature(enable = "pclmulqdq")]
634 unsafe fn sse2_pclmulqdq $($fn)*
635 #[target_feature(enable = "sse2")]
636 #[target_feature(enable = "popcnt")]
637 unsafe fn sse2_popcnt $($fn)*
638 #[target_feature(enable = "sse2")]
639 unsafe fn sse2 $($fn)*
640 fn nosimd $($fn)*
641
642 let simd = $simd;
643
644 // SAFETY: depends on the provided SimdConfig, which cannot be incorrectly constructed.
645 unsafe {
646 match simd.dispatch_tag() {
647 $crate::classification::simd::AVX2_PCLMULQDQ_POPCNT => avx2_pclmulqdq_popcnt($($arg),*),
648 $crate::classification::simd::SSSE3_PCLMULQDQ_POPCNT => ssse3_pclmulqdq_popcnt($($arg),*),
649 $crate::classification::simd::SSSE3_PCLMULQDQ => ssse3_pclmulqdq($($arg),*),
650 $crate::classification::simd::SSSE3_POPCNT => ssse3_popcnt($($arg),*),
651 $crate::classification::simd::SSSE3 => ssse3($($arg),*),
652 $crate::classification::simd::SSE2_PCLMULQDQ_POPCNT => sse2_pclmulqdq_popcnt($($arg),*),
653 $crate::classification::simd::SSE2_PCLMULQDQ => sse2_pclmulqdq($($arg),*),
654 $crate::classification::simd::SSE2_POPCNT => sse2_popcnt($($arg),*),
655 $crate::classification::simd::SSE2 => sse2($($arg),*),
656 _ => nosimd($($arg),*),
657 }
658 }
659 }};
660 }
661 }
662 else {
663 macro_rules! dispatch_simd {
664 ($simd:expr; $( $arg:expr ),* => fn $( $fn:tt )*) => {{
665 fn nosimd $($fn)*
666 nosimd($($arg),*)
667 }};
668 }
669 }
670}
671
672cfg_if! {
673 if #[cfg(target_arch = "x86_64")] {
674 macro_rules! config_simd {
675 ($conf:expr => |$simd:ident| $b:block) => {
676 {
677 let conf = $conf;
678
679 match conf.highest_simd() {
680 // AVX2 implies all other optimizations.
681 $crate::classification::simd::SimdTag::Avx2 => {
682 assert!(conf.fast_quotes());
683 assert!(conf.fast_popcnt());
684 let $simd = $crate::classification::simd::ResolvedSimd::<
685 $crate::classification::quotes::avx2_64::Constructor,
686 $crate::classification::structural::avx2_64::Constructor,
687 $crate::classification::depth::avx2_64::Constructor,
688 $crate::classification::memmem::avx2_64::Constructor,
689 {$crate::classification::simd::AVX2_PCLMULQDQ_POPCNT},
690 >::new();
691 $b
692 }
693 $crate::classification::simd::SimdTag::Ssse3 => {
694 // In SSSE3 we need to check both advanced optimizations.
695 match (conf.fast_quotes(), conf.fast_popcnt()) {
696 (true, true) => {
697 let $simd = $crate::classification::simd::ResolvedSimd::<
698 $crate::classification::quotes::sse2_64::Constructor,
699 $crate::classification::structural::ssse3_64::Constructor,
700 $crate::classification::depth::sse2_64::Constructor,
701 $crate::classification::memmem::sse2_64::Constructor,
702 {$crate::classification::simd::SSSE3_PCLMULQDQ_POPCNT},
703 >::new();
704 $b
705 }
706 (true, false) => {
707 let $simd = $crate::classification::simd::ResolvedSimd::<
708 $crate::classification::quotes::sse2_64::Constructor,
709 $crate::classification::structural::ssse3_64::Constructor,
710 $crate::classification::depth::nosimd::Constructor,
711 $crate::classification::memmem::sse2_64::Constructor,
712 {$crate::classification::simd::SSSE3_PCLMULQDQ},
713 >::new();
714 $b
715 }
716 (false, true) => {
717 let $simd = $crate::classification::simd::ResolvedSimd::<
718 $crate::classification::quotes::nosimd::Constructor,
719 $crate::classification::structural::ssse3_64::Constructor,
720 $crate::classification::depth::sse2_64::Constructor,
721 $crate::classification::memmem::sse2_64::Constructor,
722 {$crate::classification::simd::SSSE3_POPCNT},
723 >::new();
724 $b
725 }
726 (false, false) => {
727 let $simd = $crate::classification::simd::ResolvedSimd::<
728 $crate::classification::quotes::nosimd::Constructor,
729 $crate::classification::structural::ssse3_64::Constructor,
730 $crate::classification::depth::nosimd::Constructor,
731 $crate::classification::memmem::sse2_64::Constructor,
732 {$crate::classification::simd::SSSE3},
733 >::new();
734 $b
735 }
736 }
737 }
738 $crate::classification::simd::SimdTag::Sse2 => {
739 // In SSE2 we need to check both advanced optimizations,
740 // and structural classifier is denied.
741 match (conf.fast_quotes(), conf.fast_popcnt()) {
742 (true, true) => {
743 let $simd = $crate::classification::simd::ResolvedSimd::<
744 $crate::classification::quotes::sse2_64::Constructor,
745 $crate::classification::structural::nosimd::Constructor,
746 $crate::classification::depth::sse2_64::Constructor,
747 $crate::classification::memmem::sse2_64::Constructor,
748 {$crate::classification::simd::SSE2_PCLMULQDQ_POPCNT},
749 >::new();
750 $b
751 }
752 (true, false) => {
753 let $simd = $crate::classification::simd::ResolvedSimd::<
754 $crate::classification::quotes::sse2_64::Constructor,
755 $crate::classification::structural::nosimd::Constructor,
756 $crate::classification::depth::nosimd::Constructor,
757 $crate::classification::memmem::sse2_64::Constructor,
758 {$crate::classification::simd::SSE2_PCLMULQDQ},
759 >::new();
760 $b
761 }
762 (false, true) => {
763 let $simd = $crate::classification::simd::ResolvedSimd::<
764 $crate::classification::quotes::nosimd::Constructor,
765 $crate::classification::structural::nosimd::Constructor,
766 $crate::classification::depth::sse2_64::Constructor,
767 $crate::classification::memmem::sse2_64::Constructor,
768 {$crate::classification::simd::SSE2_POPCNT},
769 >::new();
770 $b
771 }
772 (false, false) => {
773 let $simd = $crate::classification::simd::ResolvedSimd::<
774 $crate::classification::quotes::nosimd::Constructor,
775 $crate::classification::structural::nosimd::Constructor,
776 $crate::classification::depth::nosimd::Constructor,
777 $crate::classification::memmem::sse2_64::Constructor,
778 {$crate::classification::simd::SSE2},
779 >::new();
780 $b
781 }
782 }
783 }
784 // nosimd denies all optimizations.
785 $crate::classification::simd::SimdTag::Nosimd => {
786 let $simd = $crate::classification::simd::ResolvedSimd::<
787 $crate::classification::quotes::nosimd::Constructor,
788 $crate::classification::structural::nosimd::Constructor,
789 $crate::classification::depth::nosimd::Constructor,
790 $crate::classification::memmem::nosimd::Constructor,
791 {$crate::classification::simd::NOSIMD}
792 >::new();
793 $b
794 }
795 }
796 }
797 };
798 }
799 }
800 else if #[cfg(target_arch = "x86")] {
801 macro_rules! config_simd {
802 ($conf:expr => |$simd:ident| $b:block) => {
803 {
804 let conf = $conf;
805
806 match conf.highest_simd() {
807 // AVX2 implies all other optimizations.
808 $crate::classification::simd::SimdTag::Avx2 => {
809 assert!(conf.fast_quotes());
810 assert!(conf.fast_popcnt());
811 let $simd = $crate::classification::simd::ResolvedSimd::<
812 $crate::classification::quotes::avx2_32::Constructor,
813 $crate::classification::structural::avx2_32::Constructor,
814 $crate::classification::depth::avx2_32::Constructor,
815 $crate::classification::memmem::avx2_32::Constructor,
816 {$crate::classification::simd::AVX2_PCLMULQDQ_POPCNT},
817 >::new();
818 $b
819 }
820 $crate::classification::simd::SimdTag::Ssse3 => {
821 // In SSSE3 we need to check both advanced optimizations.
822 match (conf.fast_quotes(), conf.fast_popcnt()) {
823 (true, true) => {
824 let $simd = $crate::classification::simd::ResolvedSimd::<
825 $crate::classification::quotes::sse2_32::Constructor,
826 $crate::classification::structural::ssse3_32::Constructor,
827 $crate::classification::depth::sse2_32::Constructor,
828 $crate::classification::memmem::sse2_32::Constructor,
829 {$crate::classification::simd::SSSE3_PCLMULQDQ_POPCNT}
830 >::new();
831 $b
832 }
833 (true, false) => {
834 let $simd = $crate::classification::simd::ResolvedSimd::<
835 $crate::classification::quotes::sse2_32::Constructor,
836 $crate::classification::structural::ssse3_32::Constructor,
837 $crate::classification::depth::nosimd::Constructor,
838 $crate::classification::memmem::sse2_32::Constructor,
839 {$crate::classification::simd::SSSE3_PCLMULQDQ}
840 >::new();
841 $b
842 }
843 (false, true) => {
844 let $simd = $crate::classification::simd::ResolvedSimd::<
845 $crate::classification::quotes::nosimd::Constructor,
846 $crate::classification::structural::ssse3_32::Constructor,
847 $crate::classification::depth::sse2_32::Constructor,
848 $crate::classification::memmem::sse2_32::Constructor,
849 {$crate::classification::simd::SSSE3_POPCNT}
850 >::new();
851 $b
852 }
853 (false, false) => {
854 let $simd = $crate::classification::simd::ResolvedSimd::<
855 $crate::classification::quotes::nosimd::Constructor,
856 $crate::classification::structural::ssse3_32::Constructor,
857 $crate::classification::depth::nosimd::Constructor,
858 $crate::classification::memmem::sse2_32::Constructor,
859 {$crate::classification::simd::SSSE3}
860 >::new();
861 $b
862 }
863 }
864 }
865 $crate::classification::simd::SimdTag::Sse2 => {
866 // In SSE2 we need to check both advanced optimizations,
867 // and structural classifier is denied.
868 match (conf.fast_quotes(), conf.fast_popcnt()) {
869 (true, true) => {
870 let $simd = $crate::classification::simd::ResolvedSimd::<
871 $crate::classification::quotes::sse2_32::Constructor,
872 $crate::classification::structural::nosimd::Constructor,
873 $crate::classification::depth::sse2_32::Constructor,
874 $crate::classification::memmem::sse2_32::Constructor,
875 {$crate::classification::simd::SSE2_PCLMULQDQ_POPCNT}
876 >::new();
877 $b
878 }
879 (true, false) => {
880 let $simd = $crate::classification::simd::ResolvedSimd::<
881 $crate::classification::quotes::sse2_32::Constructor,
882 $crate::classification::structural::nosimd::Constructor,
883 $crate::classification::depth::nosimd::Constructor,
884 $crate::classification::memmem::sse2_32::Constructor,
885 {$crate::classification::simd::SSE2_PCLMULQDQ}
886 >::new();
887 $b
888 }
889 (false, true) => {
890 let $simd = $crate::classification::simd::ResolvedSimd::<
891 $crate::classification::quotes::nosimd::Constructor,
892 $crate::classification::structural::nosimd::Constructor,
893 $crate::classification::depth::sse2_32::Constructor,
894 $crate::classification::memmem::sse2_32::Constructor,
895 {$crate::classification::simd::SSE2_POPCNT}
896 >::new();
897 $b
898 }
899 (false, false) => {
900 let $simd = $crate::classification::simd::ResolvedSimd::<
901 $crate::classification::quotes::nosimd::Constructor,
902 $crate::classification::structural::nosimd::Constructor,
903 $crate::classification::depth::nosimd::Constructor,
904 $crate::classification::memmem::sse2_32::Constructor,
905 {$crate::classification::simd::SSE2}
906 >::new();
907 $b
908 }
909 }
910 }
911 // nosimd denies all optimizations.
912 $crate::classification::simd::SimdTag::Nosimd => {
913 let $simd = $crate::classification::simd::ResolvedSimd::<
914 $crate::classification::quotes::nosimd::Constructor,
915 $crate::classification::structural::nosimd::Constructor,
916 $crate::classification::depth::nosimd::Constructor,
917 $crate::classification::memmem::nosimd::Constructor,
918 {$crate::classification::simd::NOSIMD}
919 >::new();
920 $b
921 }
922 }
923 }
924 };
925 }
926 }
927 else {
928 macro_rules! config_simd {
929 ($conf:expr => |$simd:ident| $b:block) => {
930 {
931 let conf = $conf;
932 assert_eq!(conf.highest_simd(), $crate::classification::simd::SimdTag::Nosimd);
933 assert!(!conf.fast_quotes());
934 assert!(!conf.fast_popcnt());
935 let $simd = $crate::classification::simd::ResolvedSimd::<
936 $crate::classification::quotes::nosimd::Constructor,
937 $crate::classification::structural::nosimd::Constructor,
938 $crate::classification::depth::nosimd::Constructor,
939 $crate::classification::memmem::nosimd::Constructor,
940 {$crate::classification::simd::NOSIMD},
941 >::new();
942 $b
943 }
944 };
945 }
946 }
947}
948
949pub(crate) use config_simd;
950pub(crate) use dispatch_simd;